From bfbffd7805efd6e30ba79f6d762b34bf645c6955 Mon Sep 17 00:00:00 2001 From: Scott Russell Date: Mon, 16 Sep 2013 12:20:50 +0000 Subject: [PATCH] ICU-10261 give DateFormat more granular leniency control X-SVN-Rev: 34326 --- .../core/src/com/ibm/icu/text/DateFormat.java | 64 ++++++++++++++++++- .../com/ibm/icu/text/SimpleDateFormat.java | 12 ++-- .../icu/dev/test/format/DateFormatTest.java | 61 +++++++++++++++++- 3 files changed, 130 insertions(+), 7 deletions(-) diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/DateFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/DateFormat.java index 0968866f218..5f079a97dde 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/DateFormat.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/DateFormat.java @@ -11,6 +11,7 @@ import java.text.Format; import java.text.ParseException; import java.text.ParsePosition; import java.util.Date; +import java.util.EnumSet; import java.util.HashMap; import java.util.Locale; import java.util.Map; @@ -444,6 +445,28 @@ public abstract class DateFormat extends UFormat { public final static int FIELD_COUNT = 34; // must == DateFormatSymbols.patternChars.length() + + /** + * boolean attributes + *
+ * PARSE_ALLOW_WHITESPACE - indicates whitespace tolerance. Also included is trailing dot tolerance. + *
+ * PARSE_ALLOW_NUMERIC - indicates tolerance of numeric data when String data may be assumed. eg: YEAR_NAME_FIELD + * + * @internal ICU 5.2 technology preview + */ + public enum BooleanAttribute { + /** indicates whitespace tolerance. Also included is trailing dot tolerance. */ + PARSE_ALLOW_WHITESPACE, + /** indicates tolerance of numeric data when String data may be assumed. eg: YEAR_NAME_FIELD */ + PARSE_ALLOW_NUMERIC + }; + + /** + * boolean attributes for this instance. Inclusion in this is indicates a true condition. + */ + private EnumSet booleanAttributes = EnumSet.allOf(BooleanAttribute.class); + // Proclaim serial compatibility with 1.1 FCS private static final long serialVersionUID = 7218322306649953788L; @@ -1434,8 +1457,12 @@ public abstract class DateFormat extends UFormat { * lenient parsing, the parser may use heuristics to interpret inputs that * do not precisely match this object's format. With strict parsing, * inputs must match this object's format. + *

+ * Note: This method is specific to the encapsulated Calendar object. DateFormat + * leniency aspects are controlled by setBooleanAttribute. * @param lenient when true, parsing is lenient * @see com.ibm.icu.util.Calendar#setLenient + * @see #setBooleanAttribute(BooleanAttribute, boolean) * @stable ICU 2.0 */ public void setLenient(boolean lenient) @@ -1444,7 +1471,7 @@ public abstract class DateFormat extends UFormat { } /** - * Returns whether date/time parsing is lenient. + * Returns whether date/time parsing in the encapsulated Calendar object is lenient. * @stable ICU 2.0 */ public boolean isLenient() @@ -1452,6 +1479,41 @@ public abstract class DateFormat extends UFormat { return calendar.isLenient(); } + /** + * set a boolean attribute for this instance. Aspects of DateFormat leniency are controlled by + * boolean attributes. + * + * @see BooleanAttribute + * @internal ICU 5.2 technology preview + */ + public DateFormat setBooleanAttribute(BooleanAttribute key, boolean value) + { + if(booleanAttributes.contains(key) && value == false) + booleanAttributes.remove(key); + + if(value == true && !booleanAttributes.contains(key)) + booleanAttributes.add(key); + + return this; + } + + /** + * get the current value for the specified BooleanAttribute for this instance + * + * if attribute is missing false is returned. + * + * @see BooleanAttribute + * @internal ICU 5.2 technology preview + */ + public boolean getBooleanAttribute(BooleanAttribute key) + { + if(booleanAttributes.contains(key)) + return true; + else + return false; + } + + /** * Overrides hashCode. * @stable ICU 2.0 diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/SimpleDateFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/SimpleDateFormat.java index fa556f65c5d..38b6ce24bd2 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/SimpleDateFormat.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/SimpleDateFormat.java @@ -2211,7 +2211,7 @@ public class SimpleDateFormat extends DateFormat { // Special hack for trailing "." after non-numeric field. if (pos < text.length()) { char extra = text.charAt(pos); - if (extra == '.' && isLenient() && items.length != 0) { + if (extra == '.' && getBooleanAttribute(DateFormat.BooleanAttribute.PARSE_ALLOW_WHITESPACE) && items.length != 0) { // only do if the last field is not numeric Object lastItem = items[items.length - 1]; if (lastItem instanceof PatternItem && !((PatternItem)lastItem).isNumeric) { @@ -2426,7 +2426,7 @@ public class SimpleDateFormat extends DateFormat { ++pos; } } else if (pch != ich) { - if (ich == '.' && pos == originalPos && 0 < itemIndex && isLenient()) { + if (ich == '.' && pos == originalPos && 0 < itemIndex && getBooleanAttribute(DateFormat.BooleanAttribute.PARSE_ALLOW_WHITESPACE)) { Object before = items[itemIndex-1]; if (before instanceof PatternItem) { boolean isNumeric = ((PatternItem) before).isNumeric; @@ -2435,6 +2435,9 @@ public class SimpleDateFormat extends DateFormat { continue; } } + } else if ((pch == ' ' || pch == '.') && getBooleanAttribute(DateFormat.BooleanAttribute.PARSE_ALLOW_WHITESPACE)) { + ++idx; + continue; } break; } @@ -2442,7 +2445,7 @@ public class SimpleDateFormat extends DateFormat { ++pos; } complete[0] = idx == plen; - if (complete[0] == false && isLenient() && 0 < itemIndex && itemIndex < items.length - 1) { + if (complete[0] == false && getBooleanAttribute(DateFormat.BooleanAttribute.PARSE_ALLOW_WHITESPACE) && 0 < itemIndex && itemIndex < items.length - 1) { // If fully lenient, accept " "* for any text between a date and a time field // We don't go more lenient, because we don't want to accept "12/31" for "12:31". // People may be trying to parse for a date, then for a time. @@ -2687,7 +2690,6 @@ public class SimpleDateFormat extends DateFormat { int value = 0; int i; ParsePosition pos = new ParsePosition(0); - boolean lenient = isLenient(); //int patternCharIndex = DateFormatSymbols.patternChars.indexOf(ch);c int patternCharIndex = -1; @@ -2843,7 +2845,7 @@ public class SimpleDateFormat extends DateFormat { return newStart; } } - if ( number != null && (lenient || formatData.shortYearNames == null || value > formatData.shortYearNames.length) ) { + if ( number != null && (getBooleanAttribute(DateFormat.BooleanAttribute.PARSE_ALLOW_NUMERIC) || formatData.shortYearNames == null || value > formatData.shortYearNames.length) ) { cal.set(Calendar.YEAR, value); return pos.getIndex(); } diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateFormatTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateFormatTest.java index f30b2dd4680..72f3169fc39 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateFormatTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/DateFormatTest.java @@ -4274,7 +4274,66 @@ public class DateFormatTest extends com.ibm.icu.dev.test.TestFmwk { return ok; } -} + public void TestDateFormatLeniency() { + // For details see http://bugs.icu-project.org/trac/ticket/10261 + + class TestDateFormatLeniencyItem { + public boolean leniency; + public String parseString; + public String pattern; + public String expectedResult; // null indicates expected error + // Simple constructor + public TestDateFormatLeniencyItem(boolean len, String parString, String patt, String expResult) { + leniency = len; + pattern = patt; + parseString = parString; + expectedResult = expResult; + } + }; + final TestDateFormatLeniencyItem[] items = { + // leniency parse String pattern expected result + new TestDateFormatLeniencyItem(true, "2008-Jan 02", "yyyy-LLL. dd", "2008-Jan. 02"), + new TestDateFormatLeniencyItem(false, "2008-Jan 03", "yyyy-LLL. dd", null), + new TestDateFormatLeniencyItem(true, "2008-Jan--04", "yyyy-MMM' -- 'dd", "2008-Jan -- 04"), + new TestDateFormatLeniencyItem(false, "2008-Jan--05", "yyyy-MMM' -- 'dd", null), + new TestDateFormatLeniencyItem(true, "2008-12-31", "yyyy-mm-dd", "2008-12-31") + }; + StringBuffer result = new StringBuffer(); + Date d = new Date(); + Calendar cal = GregorianCalendar.getInstance(TimeZone.getTimeZone("GMT"), Locale.US); + SimpleDateFormat sdfmt = new SimpleDateFormat(); + ParsePosition p = new ParsePosition(0); + for (TestDateFormatLeniencyItem item: items) { + cal.clear(); + sdfmt.setCalendar(cal); + sdfmt.applyPattern(item.pattern); + sdfmt.setLenient(item.leniency); + sdfmt.setBooleanAttribute(DateFormat.BooleanAttribute.PARSE_ALLOW_WHITESPACE, item.leniency); + sdfmt.setBooleanAttribute(DateFormat.BooleanAttribute.PARSE_ALLOW_NUMERIC, item.leniency); + result.setLength(0); + p.setIndex(0); + p.setErrorIndex(-1); + d = sdfmt.parse(item.parseString, p); + if(item.expectedResult == null) { + if(p.getErrorIndex() != -1) + continue; + else + errln("error: unexpected parse success..."+item.parseString + " w/ lenient="+item.leniency+" should have faile"); + } + if(p.getErrorIndex() != -1) { + errln("error: parse error for string " +item.parseString + " -- idx["+p.getIndex()+"] errIdx["+p.getErrorIndex()+"]"); + continue; + } + cal.setTime(d); + result = sdfmt.format(cal, result, new FieldPosition(0)); + if(!result.toString().equalsIgnoreCase(item.expectedResult)) { + errln("error: unexpected format result. expected - " + item.expectedResult + " but result was - " + result); + } else { + logln("formatted results match! - " + result.toString()); + } + } + } +} -- 2.40.0