From: Yoshito Umaoka Date: Wed, 4 Oct 2017 14:34:54 +0000 (+0000) Subject: ICU-13366 Fixed ICU4J number parsing problems with supplimental characters in SimpleD... X-Git-Tag: release-60-rc~57 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=25ee7556ddc9f8e6c89bb8a38bc260d4e268c389;p=icu ICU-13366 Fixed ICU4J number parsing problems with supplimental characters in SimpleDateFormat and TimeZoneFormat. X-SVN-Rev: 40544 --- diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/DateNumberFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/DateNumberFormat.java index af077d5e899..2baa01816cc 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/DateNumberFormat.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/DateNumberFormat.java @@ -45,6 +45,9 @@ public final class DateNumberFormat extends NumberFormat { private int minIntDigits; public DateNumberFormat(ULocale loc, String digitString, String nsName) { + if (digitString.length() > 10) { + throw new UnsupportedOperationException("DateNumberFormat does not support digits out of BMP."); + } initialize(loc,digitString,nsName); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/DateFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/DateFormat.java index 8e87423c6d3..47ffa5de9f7 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/DateFormat.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/DateFormat.java @@ -1571,12 +1571,20 @@ public abstract class DateFormat extends UFormat { */ public void setNumberFormat(NumberFormat newNumberFormat) { - this.numberFormat = (NumberFormat)newNumberFormat.clone(); - /*In order to parse String like "11.10.2001" to DateTime correctly - in Locale("fr","CH") [Richard/GCL] - */ - this.numberFormat.setParseIntegerOnly(true); - this.numberFormat.setGroupingUsed(false); + numberFormat = (NumberFormat)newNumberFormat.clone(); + fixNumberFormatForDates(numberFormat); + } + + // no matter what the locale's default number format looked like, we want + // to modify it so that it doesn't use thousands separators, doesn't always + // show the decimal point, and recognizes integers only when parsing + static void fixNumberFormatForDates(NumberFormat nf) { + nf.setGroupingUsed(false); + if (nf instanceof DecimalFormat) { + ((DecimalFormat)nf).setDecimalSeparatorAlwaysShown(false); + } + nf.setParseIntegerOnly(true); + nf.setMinimumFractionDigits(0); } /** diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/SimpleDateFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/SimpleDateFormat.java index b5d63896d18..5c81eba1e1f 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/SimpleDateFormat.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/SimpleDateFormat.java @@ -1115,15 +1115,20 @@ public class SimpleDateFormat extends DateFormat { } if (numberFormat == null) { NumberingSystem ns = NumberingSystem.getInstance(locale); - if (ns.isAlgorithmic()) { + String digitString = ns.getDescription(); + // DateNumberFormat does not support non-BMP digits at this moment. + if (ns.isAlgorithmic() || digitString.length() != 10) { numberFormat = NumberFormat.getInstance(locale); } else { - String digitString = ns.getDescription(); String nsName = ns.getName(); // Use a NumberFormat optimized for date formatting numberFormat = new DateNumberFormat(locale, digitString, nsName); } } + if (numberFormat instanceof DecimalFormat) { + fixNumberFormatForDates(numberFormat); + } + // Note: deferring calendar calculation until when we really need it. // Instead, we just record time of construction for backward compatibility. defaultCenturyBase = System.currentTimeMillis(); @@ -1151,7 +1156,14 @@ public class SimpleDateFormat extends DateFormat { String digits = null; if (numberFormat instanceof DecimalFormat) { DecimalFormatSymbols decsym = ((DecimalFormat) numberFormat).getDecimalFormatSymbols(); - digits = new String(decsym.getDigits()); + String[] strDigits = decsym.getDigitStrings(); + // Note: TimeZoneFormat#setGMTOffsetDigits() does not support string array, + // so we need to concatenate digits to make a single string. + StringBuilder digitsBuf = new StringBuilder(); + for (String digit : strDigits) { + digitsBuf.append(digit); + } + digits = digitsBuf.toString(); } else if (numberFormat instanceof DateNumberFormat) { digits = new String(((DateNumberFormat)numberFormat).getDigits()); } @@ -2236,8 +2248,17 @@ public class SimpleDateFormat extends DateFormat { */ private void initLocalZeroPaddingNumberFormat() { if (numberFormat instanceof DecimalFormat) { - decDigits = ((DecimalFormat)numberFormat).getDecimalFormatSymbols().getDigits(); + DecimalFormatSymbols tmpDecfs = ((DecimalFormat)numberFormat).getDecimalFormatSymbols(); + String[] tmpDigits = tmpDecfs.getDigitStringsLocal(); useLocalZeroPaddingNumberFormat = true; + decDigits = new char[10]; + for (int i = 0; i < 10; i++) { + if (tmpDigits[i].length() > 1) { + useLocalZeroPaddingNumberFormat = false; + break; + } + decDigits[i] = tmpDigits[i].charAt(0); + } } else if (numberFormat instanceof DateNumberFormat) { decDigits = ((DateNumberFormat)numberFormat).getDigits(); useLocalZeroPaddingNumberFormat = true; @@ -3226,10 +3247,8 @@ public class SimpleDateFormat extends DateFormat { /* Skip this for Chinese calendar, moved from ChineseDateFormat */ if ( override != null && (override.compareTo("hebr") == 0 || override.indexOf("y=hebr") >= 0) && value < 1000 ) { value += HEBREW_CAL_CUR_MILLENIUM_START_YEAR; - } else if (count == 2 && (pos.getIndex() - start) == 2 && cal.haveDefaultCentury() - && UCharacter.isDigit(text.charAt(start)) - && UCharacter.isDigit(text.charAt(start+1))) - { + } else if (count == 2 && text.codePointCount(start, pos.getIndex()) == 2 && cal.haveDefaultCentury() + && countDigits(text, start, pos.getIndex()) == 2) { // Assume for example that the defaultCenturyStart is 6/18/1903. // This means that two-digit years will be forced into the range // 6/18/1903 to 6/17/2003. As a result, years 00, 01, and 02 @@ -3242,7 +3261,7 @@ public class SimpleDateFormat extends DateFormat { ambiguousYear[0] = value == ambiguousTwoDigitYear; value += (getDefaultCenturyStartYear()/100)*100 + (value < ambiguousTwoDigitYear ? 100 : 0); - } + } cal.set(field, value); // Delayed checking for adjustment of Hebrew month numbers in non-leap years. @@ -3322,7 +3341,7 @@ public class SimpleDateFormat extends DateFormat { return pos.getIndex(); case 8: // 'S' - FRACTIONAL_SECOND // Fractional seconds left-justify - i = pos.getIndex() - start; + i = countDigits(text, start, pos.getIndex()); if (i < 3) { while (i < 3) { value *= 10; @@ -3788,6 +3807,26 @@ public class SimpleDateFormat extends DateFormat { return number; } + /** + * Counts number of digit code points in the specified text. + * + * @param text input text + * @param start start index, inclusive + * @param end end index, exclusive + * @return number of digits found in the text in the specified range. + */ + private static int countDigits(String text, int start, int end) { + int numDigits = 0; + int idx = start; + while (idx < end) { + int cp = text.codePointAt(idx); + if (UCharacter.isDigit(cp)) { + numDigits++; + } + idx += UCharacter.charCount(cp); + } + return numDigits; + } /** * Translate a pattern, mapping each character in the from string to the diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestDateFormat.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestDateFormat.java index ec36fc1e454..8dc9c321933 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestDateFormat.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestDateFormat.java @@ -77,7 +77,7 @@ public class IntlTestDateFormat extends TestFmwk { fLimit = 3; for(timeStyle = 0; timeStyle < 4; timeStyle++) { - fTestName = new String("Time test " + timeStyle + " (" + localeName + ")"); + fTestName = "Time test " + timeStyle + " (" + localeName + ")"; try { fFormat = DateFormat.getTimeInstance(timeStyle, locale); } @@ -85,13 +85,13 @@ public class IntlTestDateFormat extends TestFmwk { errln("FAIL: localeTest time getTimeInstance exception"); throw e; } - TestFormat(); + testDates(); } fLimit = 2; for(dateStyle = 0; dateStyle < 4; dateStyle++) { - fTestName = new String("Date test " + dateStyle + " (" + localeName + ")"); + fTestName = "Date test " + dateStyle + " (" + localeName + ")"; try { fFormat = DateFormat.getDateInstance(dateStyle, locale); } @@ -99,12 +99,12 @@ public class IntlTestDateFormat extends TestFmwk { errln("FAIL: localeTest date getTimeInstance exception"); throw e; } - TestFormat(); + testDates(); } for(dateStyle = 0; dateStyle < 4; dateStyle++) { for(timeStyle = 0; timeStyle < 4; timeStyle++) { - fTestName = new String("DateTime test " + dateStyle + "/" + timeStyle + " (" + localeName + ")"); + fTestName = "DateTime test " + dateStyle + "/" + timeStyle + " (" + localeName + ")"; try { fFormat = DateFormat.getDateTimeInstance(dateStyle, timeStyle, locale); } @@ -112,13 +112,12 @@ public class IntlTestDateFormat extends TestFmwk { errln("FAIL: localeTest date/time getDateTimeInstance exception"); throw e; } - TestFormat(); + testDates(); } } } - @Test - public void TestFormat() { + private void testDates() { if (fFormat == null) { errln("FAIL: DateFormat creation failed"); return; @@ -259,6 +258,7 @@ public class IntlTestDateFormat extends TestFmwk { new ULocale("bg_BG"), new ULocale("fr_CA"), new ULocale("zh_TW"), + new ULocale("ccp"), // decimal digits are not in BMP }; } else { locales = DateFormat.getAvailableULocales(); diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/TimeZoneFormatTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/TimeZoneFormatTest.java index e3719cfd971..82b221853cf 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/TimeZoneFormatTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/TimeZoneFormatTest.java @@ -128,7 +128,8 @@ public class TimeZoneFormatTest extends TestFmwk { if (TEST_ALL || TestFmwk.getExhaustiveness() > 5) { LOCALES = ULocale.getAvailableLocales(); } else { - LOCALES = new ULocale[] {new ULocale("en"), new ULocale("en_CA"), new ULocale("fr"), new ULocale("zh_Hant"), new ULocale("fa")}; + LOCALES = new ULocale[] {new ULocale("en"), new ULocale("en_CA"), new ULocale("fr"), + new ULocale("zh_Hant"), new ULocale("fa"), new ULocale("ccp")}; } String[] tzids; @@ -245,10 +246,13 @@ public class TimeZoneFormatTest extends TestFmwk { if (!isOffsetFormat) { // Check if localized GMT format is used as a fallback of name styles int numDigits = 0; - for (int n = 0; n < tzstr.length(); n++) { - if (UCharacter.isDigit(tzstr.charAt(n))) { + int idx = 0; + while (idx < tzstr.length()) { + int cp = tzstr.codePointAt(idx); + if (UCharacter.isDigit(cp)) { numDigits++; } + idx += UCharacter.charCount(cp); } isOffsetFormat = (numDigits > 0); }