]> granicus.if.org Git - icu/commitdiff
ICU-21650 Modified DateTimePatternGenerator to handle skeletons with "e" and "c"...
authorRich Gillam <62772518+richgillam@users.noreply.github.com>
Tue, 6 Jul 2021 20:56:15 +0000 (13:56 -0700)
committerRich Gillam <62772518+richgillam@users.noreply.github.com>
Fri, 16 Jul 2021 01:16:51 +0000 (18:16 -0700)
day-of-week abbreviations).

icu4c/source/i18n/dtptngen.cpp
icu4c/source/test/intltest/dtptngts.cpp
icu4c/source/test/intltest/dtptngts.h
icu4j/main/classes/core/src/com/ibm/icu/text/DateTimePatternGenerator.java
icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateTimeGeneratorTest.java

index 21f2362d171cb483f694e2a6f1849ad9daad8298..aefa96fcd1a5b8eb40c1acd93d19254ca9d6d15b 100644 (file)
@@ -1648,7 +1648,11 @@ DateTimePatternGenerator::adjustFieldTypes(const UnicodeString& pattern,
                          (typeValue==UDATPG_MINUTE_FIELD && (options & UDATPG_MATCH_MINUTE_FIELD_LENGTH)==0) ||
                          (typeValue==UDATPG_SECOND_FIELD && (options & UDATPG_MATCH_SECOND_FIELD_LENGTH)==0) ) {
                          adjFieldLen = field.length();
-                    } else if (specifiedSkeleton) {
+                    } else if (specifiedSkeleton && reqFieldChar != LOW_C && reqFieldChar != LOW_E) {
+                        // (we skip this section for 'c' and 'e' because unlike the other characters considered in this function,
+                        // they have no minimum field length-- 'E' and 'EE' are equivalent to 'EEE', but 'e' and 'ee' are not
+                        // equivalent to 'eee' -- see the entries for "week day" in
+                        // https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table for more info)
                         int32_t skelFieldLen = specifiedSkeleton->original.getFieldLength(typeValue);
                         UBool patFieldIsNumeric = (row->type > 0);
                         UBool skelFieldIsNumeric = (specifiedSkeleton->type[typeValue] > 0);
@@ -1663,6 +1667,9 @@ DateTimePatternGenerator::adjustFieldTypes(const UnicodeString& pattern,
                             && (typeValue!= UDATPG_YEAR_FIELD || reqFieldChar==CAP_Y))
                             ? reqFieldChar
                             : field.charAt(0);
+                    if (c == CAP_E && adjFieldLen < 3) {
+                        c = LOW_E;
+                    }
                     if (typeValue == UDATPG_HOUR_FIELD && fDefaultHourFormatChar != 0) {
                         // The adjustment here is required to match spec (https://www.unicode.org/reports/tr35/tr35-dates.html#dfst-hour).
                         // It is necessary to match the hour-cycle preferred by the Locale.
index 9104aa7035bff64b9aab97fdeeef2e85978fac32..9b0e7b5e3b38d77bc3f49e84d264986bf039f742 100644 (file)
@@ -45,6 +45,7 @@ void IntlTestDateTimePatternGeneratorAPI::runIndexedTest( int32_t index, UBool e
         TESTCASE(9, testFallbackWithDefaultRootLocale);
         TESTCASE(10, testGetDefaultHourCycle_OnEmptyInstance);
         TESTCASE(11, test_jConsistencyOddLocales);
+        TESTCASE(12, testBestPattern);
         default: name = ""; break;
     }
 }
@@ -1557,4 +1558,69 @@ void IntlTestDateTimePatternGeneratorAPI::test_jConsistencyOddLocales() { // ICU
     }
 }
 
+void IntlTestDateTimePatternGeneratorAPI::testBestPattern() {
+    // generic test for DateTimePatternGenerator::getBestPattern() that can be used to test multiple
+    // bugs in the resource data
+    static const char* testCases[] = {
+        // ICU-21650: (See the "week day" section of https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table
+        // for a full explanation of why this is the desired behavior)
+        // if the user asks for E, the minimum field length is 3, but if he asks for c or e, it's 1
+        "en_US",      "E",           "ccc",
+        "en_US",      "c",           "c",
+        "en_US",      "e",           "c",
+        "en_US",      "EE",          "ccc",
+        "en_US",      "cc",          "cc",
+        "en_US",      "ee",          "cc",
+        "en_US",      "EEE",         "ccc",
+        "en_US",      "ccc",         "ccc",
+        "en_US",      "eee",         "ccc",
+        // and if the user asked for c or e and the field length is 1 or 2, the output pattern should contain
+        // e instead of E (e supports numeric abbreviations; E doesn't)
+        "en_US",      "yMEd",        "EEE, M/d/y",
+        "en_US",      "yMcd",        "e, M/d/y",
+        "en_US",      "yMed",        "e, M/d/y",
+        "en_US",      "yMMEEdd",     "EEE, MM/dd/y",
+        "en_US",      "yMMccdd",     "ee, MM/dd/y",
+        "en_US",      "yMMeedd",     "ee, MM/dd/y",
+        "en_US",      "yMMMEd",      "EEE, MMM d, y",
+        "en_US",      "yMMMcccd",    "EEE, MMM d, y",
+        "en_US",      "yMMMeeed",    "EEE, MMM d, y",
+        "en_US",      "yMMMMEEEEd",  "EEEE, MMMM d, y",
+        "en_US",      "yMMMMccccd",  "EEEE, MMMM d, y",
+        "en_US",      "yMMMMeeeed",  "EEEE, MMMM d, y",
+    };
+    
+    for (int32_t i = 0; i < UPRV_LENGTHOF(testCases); i += 3) {
+        const char* localeID(testCases[i]);
+        const char* skeleton(testCases[i + 1]);
+        const char* expectedPattern(testCases[i + 2]);
+        
+        UErrorCode err = U_ZERO_ERROR;
+        UnicodeString actualPattern;
+        
+        if (uprv_strcmp(skeleton, "full") != 0) {
+            LocalPointer<DateTimePatternGenerator> dtpg(DateTimePatternGenerator::createInstance(localeID, err), err);
+            actualPattern = dtpg->getBestPattern(UnicodeString(skeleton), err);
+        } else {
+            LocalPointer<DateFormat> df(DateFormat::createDateInstance(DateFormat::kFull, localeID));
+            SimpleDateFormat* sdf = dynamic_cast<SimpleDateFormat*>(df.getAlias());
+            
+            if (sdf != NULL) {
+                sdf->toPattern(actualPattern);
+            }
+        }
+        
+        if (U_FAILURE(err)) {
+            errln("Failure for test case %s/%s: %s", localeID, skeleton, u_errorName(err));
+        } else {
+            char failureMessage[100];
+            strcpy(failureMessage, "Wrong result for test case ");
+            strcat(failureMessage, localeID);
+            strcat(failureMessage, "/");
+            strcat(failureMessage, skeleton);
+            assertEquals(failureMessage, UnicodeString(expectedPattern), actualPattern);
+        }
+    }
+}
+
 #endif /* #if !UCONFIG_NO_FORMATTING */
index b3fb02e2bd213a2fc8c606184cd79cb7288bfe42..b6412467439af0578321e3962eaf14eacff687e1 100644 (file)
@@ -37,6 +37,7 @@ private:
     void testFallbackWithDefaultRootLocale();
     void testGetDefaultHourCycle_OnEmptyInstance();
     void test_jConsistencyOddLocales();
+    void testBestPattern();
 };
 
 #endif /* #if !UCONFIG_NO_FORMATTING */
index 528eed80e01f099870e7856d934b307c9b3d0ddb..56add4cedd8a9da40901a3e82f141f5875619d96 100644 (file)
@@ -2200,7 +2200,11 @@ public class DateTimePatternGenerator implements Freezable<DateTimePatternGenera
                             (type == MINUTE && (options & MATCH_MINUTE_FIELD_LENGTH)==0) ||
                             (type == SECOND && (options & MATCH_SECOND_FIELD_LENGTH)==0) ) {
                         adjFieldLen = fieldBuilder.length();
-                    } else if (matcherWithSkeleton != null) {
+                    } else if (matcherWithSkeleton != null && reqFieldChar != 'c' && reqFieldChar != 'e') {
+                        // (we skip this section for 'c' and 'e' because unlike the other characters considered in this function,
+                        // they have no minimum field length-- 'E' and 'EE' are equivalent to 'EEE', but 'e' and 'ee' are not
+                        // equivalent to 'eee' -- see the entries for "week day" in
+                        // https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table for more info)
                         int skelFieldLen = matcherWithSkeleton.original.getFieldLength(type);
                         boolean patFieldIsNumeric = variableField.isNumeric();
                         boolean skelFieldIsNumeric = matcherWithSkeleton.fieldIsNumeric(type);
@@ -2217,6 +2221,12 @@ public class DateTimePatternGenerator implements Freezable<DateTimePatternGenera
                             && (type != YEAR || reqFieldChar=='Y'))
                             ? reqFieldChar
                             : fieldBuilder.charAt(0);
+                    if (c == 'E' && adjFieldLen < 3) {
+                        // see https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table:
+                        // If we want a numeric day-of-week field, we have to use 'e'-- 'E' doesn't support
+                        // numeric day-of-week abbreivations
+                        c = 'e';
+                    }
                     if (type == HOUR) {
                         // The adjustment here is required to match spec (https://www.unicode.org/reports/tr35/tr35-dates.html#dfst-hour).
                         // It is necessary to match the hour-cycle preferred by the Locale.
index 51c465dca3fc183d4f6ffc7eea14057296e31ed7..b979f3fd4df5b880b1cf990f17020f99dfbdf84a 100644 (file)
@@ -1793,4 +1793,61 @@ public class DateTimeGeneratorTest extends TestFmwk {
             }
         }
     }
+    
+    @Test
+    public void testBestPattern() {
+        // generic test for DateTimePatternGenerator::getBestPattern() that can be used to test multiple
+        // bugs in the resource data
+        String[] testCases = {
+            // ICU-21650: (See the "week day" section of https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table
+            // for a full explanation of why this is the desired behavior)
+            // if the user asks for E, the minimum field length is 3, but if he asks for c or e, it's 1
+            "en_US",      "E",           "ccc",
+            "en_US",      "c",           "c",
+            "en_US",      "e",           "c",
+            "en_US",      "EE",          "ccc",
+            "en_US",      "cc",          "cc",
+            "en_US",      "ee",          "cc",
+            "en_US",      "EEE",         "ccc",
+            "en_US",      "ccc",         "ccc",
+            "en_US",      "eee",         "ccc",
+            // and if the user asked for c or e and the field length is 1 or 2, the output pattern should contain
+            // e instead of E (e supports numeric abbreviations; E doesn't)
+            "en_US",      "yMEd",        "EEE, M/d/y",
+            "en_US",      "yMcd",        "e, M/d/y",
+            "en_US",      "yMed",        "e, M/d/y",
+            "en_US",      "yMMEEdd",     "EEE, MM/dd/y",
+            "en_US",      "yMMccdd",     "ee, MM/dd/y",
+            "en_US",      "yMMeedd",     "ee, MM/dd/y",
+            "en_US",      "yMMMEd",      "EEE, MMM d, y",
+            "en_US",      "yMMMcccd",    "EEE, MMM d, y",
+            "en_US",      "yMMMeeed",    "EEE, MMM d, y",
+            "en_US",      "yMMMMEEEEd",  "EEEE, MMMM d, y",
+            "en_US",      "yMMMMccccd",  "EEEE, MMMM d, y",
+            "en_US",      "yMMMMeeeed",  "EEEE, MMMM d, y",
+        };
+    
+        for (int i = 0; i < testCases.length; i += 3) {
+            String localeID = testCases[i];
+            ULocale locale = new ULocale(localeID);
+            String skeleton = testCases[i + 1];
+            String expectedPattern = testCases[i + 2];
+            String actualPattern = null;
+        
+            if (!skeleton.equals("full")) {
+                DateTimePatternGenerator dtpg = DateTimePatternGenerator.getInstance(locale);
+                actualPattern = dtpg.getBestPattern(skeleton);
+            } else {
+                DateFormat df = DateFormat.getDateInstance(DateFormat.FULL, locale);
+                SimpleDateFormat sdf = (SimpleDateFormat)df;
+            
+                if (sdf != null) {
+                    actualPattern = sdf.toPattern();
+                }
+            }
+        
+            assertEquals("Wrong result for test case " + localeID + "/" + skeleton, expectedPattern, actualPattern);
+        }
+    }
+
 }