From 7c59127769b93cc5560cbc7a591a6b9f8d19cb3d Mon Sep 17 00:00:00 2001
From: Shane Carr <shane@unicode.org>
Date: Thu, 26 Oct 2017 21:53:50 +0000
Subject: [PATCH] ICU-13309 Changing number parsing to accept only horizontal
 whitespace, not vertical whitespace or control characters.

X-SVN-Rev: 40646
---
 .../src/com/ibm/icu/impl/number/Parse.java    |  5 ++-
 .../src/com/ibm/icu/text/DecimalFormat.java   |  4 +-
 .../src/com/ibm/icu/text/NumberFormat.java    |  3 ++
 .../icu/dev/test/format/NumberFormatTest.java | 38 ++++++++++++++++++-
 4 files changed, 45 insertions(+), 5 deletions(-)
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/Parse.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/Parse.java
index 79bc9e03874..50d0f3c70ee 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/Parse.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/Parse.java
@@ -132,9 +132,10 @@ public class Parse {
     INSIDE_AFFIX_PATTERN;
   }
 
-  // TODO: Does this set make sense for the whitespace characters?
+  // This set was decided after discussion with icu-design@. See ticket #13309.
+  // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
   private static final UnicodeSet UNISET_WHITESPACE =
-      new UnicodeSet("[[:whitespace:][\\u2000-\\u200D]]").freeze();
+      new UnicodeSet("[[:Zs:][\\u0009]]").freeze();
 
   // BiDi characters are skipped over and ignored at any point in the string, even in strict mode.
   private static final UnicodeSet UNISET_BIDI =
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/DecimalFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/DecimalFormat.java
index 5772a08c56f..73bd53415ff 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/DecimalFormat.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/DecimalFormat.java
@@ -202,7 +202,9 @@ import com.ibm.icu.util.ULocale.Category;
  * pattern string and the input string. For example, the pattern "# %" matches "35 %" (with a single
  * space), "35%" (with no space), "35&nbsp;%" (with a non-breaking space), and "35&nbsp; %" (with
  * multiple spaces). Arbitrary ignorables are also allowed at boundaries between the parts of the
- * number: prefix, number, exponent separator, and suffix.
+ * number: prefix, number, exponent separator, and suffix. Ignorable whitespace characters are those
+ * having the Unicode "blank" property for regular expressions, defined in UTS #18 Annex C, which is
+ * "horizontal" whitespace, like spaces and tabs, but not "vertical" whitespace, like line breaks.
  *
  * <p>If {@link #parse(String, ParsePosition)} fails to parse a string, it returns <code>null</code>
  * and leaves the parse position unchanged. The convenience method {@link #parse(String)} indicates
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/NumberFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/NumberFormat.java
index dab77744d63..e97ca196dbf 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/NumberFormat.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/NumberFormat.java
@@ -424,6 +424,9 @@ public abstract class NumberFormat extends UFormat {
      * <p>Does not throw an exception; if no object can be parsed, index is
      * unchanged!
      *
+     * <p>For more detail on parsing, see the "Parsing" header in the class
+     * documentation of {@link DecimalFormat}.
+     *
      * @see #isParseIntegerOnly
      * @see DecimalFormat#setParseBigDecimal
      * @see java.text.Format#parseObject(String, ParsePosition)
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java
index 102ca85a07a..aabb7ce3ae0 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java
@@ -63,6 +63,7 @@ import com.ibm.icu.text.NumberFormat.SimpleNumberFormatFactory;
 import com.ibm.icu.text.NumberingSystem;
 import com.ibm.icu.text.PluralRules;
 import com.ibm.icu.text.RuleBasedNumberFormat;
+import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.util.Currency;
 import com.ibm.icu.util.Currency.CurrencyUsage;
 import com.ibm.icu.util.CurrencyAmount;
@@ -438,8 +439,8 @@ public class NumberFormatTest extends TestFmwk {
                 {" $ 124 ", "6", "-1"},
                 {"124$", "3", "-1"},
                 {"124 $", "3", "-1"},
-                {"$124\u200D", "4", "-1"},
-                {"$\u200D124", "5", "-1"},
+                {"$124\u200A", "4", "-1"},
+                {"$\u200A124", "5", "-1"},
         };
         NumberFormat foo = NumberFormat.getCurrencyInstance();
         for (int i = 0; i < DATA.length; ++i) {
@@ -1712,6 +1713,29 @@ public class NumberFormatTest extends TestFmwk {
         expect(fmt, "ab  1234", n);
         expect(fmt, "a b1234", n);
         expect(fmt, "a   b1234", n);
+        expect(fmt, " a b 1234", n);
+
+        // Horizontal whitespace is allowed, but not vertical whitespace.
+        expect(fmt, "\ta\u00A0b\u20001234", n);
+        expect(fmt, "a   \u200A    b1234", n);
+        expectParseException(fmt, "\nab1234", n);
+        expectParseException(fmt, "a    \n   b1234", n);
+        expectParseException(fmt, "a    \u0085   b1234", n);
+        expectParseException(fmt, "a    \u2028   b1234", n);
+
+        // Test all characters in the UTS 18 "blank" set stated in the API docstring.
+        UnicodeSet blanks = new UnicodeSet("[[:Zs:][\\u0009]]").freeze();
+        for (String space : blanks) {
+            String str = "a  " + space + "  b1234";
+            expect(fmt, str, n);
+        }
+
+        // Test that other whitespace characters do not work
+        UnicodeSet otherWhitespace = new UnicodeSet("[[:whitespace:]]").removeAll(blanks).freeze();
+        for (String space : otherWhitespace) {
+            String str = "a  " + space + "  b1234";
+            expectParseException(fmt, str, n);
+        }
     }
 
     /**
@@ -2676,6 +2700,16 @@ public class NumberFormatTest extends TestFmwk {
         expect(fmt, str, new Long(n));
     }
 
+    /** Parse test */
+    public void expectParseException(DecimalFormat fmt, String str, Number n) {
+        Number num = null;
+        try {
+            num = fmt.parse(str);
+            errln("Expected failure, but passed: " + n + " on " + fmt.toPattern() + " -> " + num);
+        } catch (ParseException e) {
+        }
+    }
+
     private void expectCurrency(NumberFormat nf, Currency curr,
             double value, String string) {
         DecimalFormat fmt = (DecimalFormat) nf;
-- 
2.40.0