ICU-13569 refresh dev branch from trunk.

author Andy Heninger <andy.heninger@gmail.com>

Wed, 14 Feb 2018 23:55:39 +0000 (23:55 +0000)

committer Andy Heninger <andy.heninger@gmail.com>

Wed, 14 Feb 2018 23:55:39 +0000 (23:55 +0000)
author Andy Heninger <andy.heninger@gmail.com>
Wed, 14 Feb 2018 23:55:39 +0000 (23:55 +0000)
committer Andy Heninger <andy.heninger@gmail.com>
Wed, 14 Feb 2018 23:55:39 +0000 (23:55 +0000)
diff --git a/.gitattributes b/.gitattributes

index d10d0ec8c2603eb2b40f0755e115708f85644ef4..fb59db4eca523fdd24b523408c528dc823359d18 100644 (file)
--- a/.gitattributes
+++ b/.gitattributes
@@ -49,7 +49,6 @@ README text !eol
  *.tri2 -text
  
  icu4c/icu4c.css -text
-icu4c/packaging/distrelease.ps1 -text
  icu4c/source/aclocal.m4 -text
  icu4c/source/config/m4/icu-conditional.m4 -text
  icu4c/source/data/curr/pool.res -text
diff --git a/.gitignore b/.gitignore

index 1d2af48c7148afd33d582574a0934a3bb67d01ee..95cc42712f9051c40e6f093179c731cdfa953024 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -635,6 +635,8 @@ icu4c/source/tools/ctestfw/libsicutest*
  icu4c/source/tools/ctestfw/release
  icu4c/source/tools/ctestfw/x64
  icu4c/source/tools/ctestfw/x86
+icu4c/source/tools/escapesrc/*.d
+icu4c/source/tools/escapesrc/Makefile
  icu4c/source/tools/genbrk/*.d
  icu4c/source/tools/genbrk/*.o
  icu4c/source/tools/genbrk/*.pdb
diff --git a/icu4c/LICENSE b/icu4c/LICENSE

index c84076cd072b806782999d6f4672df93cac1d3c6..923219da903827d9ed9189df68f3b363c84e3721 100644 (file)
--- a/icu4c/LICENSE
+++ b/icu4c/LICENSE
@@ -383,3 +383,32 @@ Database section 7.
   #    by ICANN or the IETF Trust on the database or the code.  Any person
   #    making a contribution to the database or code waives all rights to
   #    future claims in that contribution or in the TZ Database.
+
+6. Google double-conversion
+
+Copyright 2006-2011, the V8 project authors. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+    * Neither the name of Google Inc. nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/icu4c/packaging/distrelease.ps1 b/icu4c/packaging/distrelease.ps1

index 34e54bc28bc1855ff4bb809c1dbca61d84aeb050..41cd9ee4a28f2e458f2d2933e804b229ab9e9a61 100644 (file)
--- a/icu4c/packaging/distrelease.ps1
+++ b/icu4c/packaging/distrelease.ps1
@@ -1,53 +1,53 @@
-# Copyright (C) 2016 and later: Unicode, Inc. and others.\r
-# License & terms of use: http://www.unicode.org/copyright.html\r
-#-------------------------\r
-# Script: icu\packaging\distrelease.ps1\r
-# Author: Steven R. Loomis\r
-# Date: 2017-04-14\r
-#-------------------------\r
-#\r
-# This builds a zipfile containing the *64 bit* Windows binary\r
-#\r
-# Usage: (after building ICU using MSVC) \r
-#  (bring up Powershell ISE)\r
-#    cd C:\icu\icu4c\\r
-#    Set-ExecutionPolicy -Scope Process AllSigned\r
-#    .\packaging\distrelease.ps1\r
-#\r
-# Will emit: c:\icu4c\icu\source\dist\icu-windows.zip\r
-#\r
-#\r
-# You will get warnings from the execution policy and the script itself.\r
-#  see https://docs.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_execution_policies?view=powershell-5.1&viewFallbackFrom=powershell-Microsoft.PowerShell.Core \r
-#    for more about execution policies.\r
-\r
-\r
-$icuDir = Split-Path -Path $MyInvocation.MyCommand.Definition -Parent\r
-$icuDir = Resolve-Path -Path '$icuDir\..'\r
-\r
-echo  $icuDir\r
-\r
-# ok, create some work areas\r
-New-Item -Path "$icuDir\source\dist" -ErrorAction SilentlyContinue -ItemType "directory"\r
-$source = "$icuDir\source\dist\icu"\r
-Get-ChildItem -Path $source -ErrorAction SilentlyContinue | Remove-Item -Recurse\r
-New-Item -Path $source -ItemType "directory" -ErrorAction SilentlyContinue\r
-\r
-# copy required stuff\r
-Copy-Item -Path "$icuDir\lib64" -Destination $source -Recurse\r
-Copy-Item -Path "$icuDir\include" -Destination $source -Recurse\r
-Copy-Item -Path "$icuDir\bin64" -Destination $source -Recurse\r
-Copy-Item -Path "$icuDir\APIChangeReport.html" -Destination $source -Recurse\r
-Copy-Item -Path "$icuDir\icu4c.css" -Destination $source -Recurse\r
-Copy-Item -Path "$icuDir\LICENSE" -Destination $source -Recurse\r
-Copy-Item -Path "$icuDir\readme.html" -Destination $source -Recurse\r
-\r
-\r
-$destination = "$icuDir\source\dist\icu-windows.zip"\r
-Remove-Item -Path $destination -ErrorAction Continue\r
-Add-Type -assembly "system.io.compression.filesystem"\r
-Echo $source\r
-Echo $destination\r
-[io.compression.zipfile]::CreateFromDirectory($source, $destination)\r
-\r
+# Copyright (C) 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+#-------------------------
+# Script: icu\packaging\distrelease.ps1
+# Author: Steven R. Loomis
+# Date: 2017-04-14
+#-------------------------
+#
+# This builds a zipfile containing the *64 bit* Windows binary
+#
+# Usage: (after building ICU using MSVC) 
+#  (bring up Powershell ISE)
+#    cd C:\icu\icu4c\
+#    Set-ExecutionPolicy -Scope Process AllSigned
+#    .\packaging\distrelease.ps1
+#
+# Will emit: c:\icu4c\icu\source\dist\icu-windows.zip
+#
+#
+# You will get warnings from the execution policy and the script itself.
+#  see https://docs.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_execution_policies?view=powershell-5.1&viewFallbackFrom=powershell-Microsoft.PowerShell.Core 
+#    for more about execution policies.
+
+
+$icuDir = Split-Path -Path $MyInvocation.MyCommand.Definition -Parent
+$icuDir = Resolve-Path -Path '$icuDir\..'
+
+echo  $icuDir
+
+# ok, create some work areas
+New-Item -Path "$icuDir\source\dist" -ErrorAction SilentlyContinue -ItemType "directory"
+$source = "$icuDir\source\dist\icu"
+Get-ChildItem -Path $source -ErrorAction SilentlyContinue | Remove-Item -Recurse
+New-Item -Path $source -ItemType "directory" -ErrorAction SilentlyContinue
+
+# copy required stuff
+Copy-Item -Path "$icuDir\lib64" -Destination $source -Recurse
+Copy-Item -Path "$icuDir\include" -Destination $source -Recurse
+Copy-Item -Path "$icuDir\bin64" -Destination $source -Recurse
+Copy-Item -Path "$icuDir\APIChangeReport.html" -Destination $source -Recurse
+Copy-Item -Path "$icuDir\icu4c.css" -Destination $source -Recurse
+Copy-Item -Path "$icuDir\LICENSE" -Destination $source -Recurse
+Copy-Item -Path "$icuDir\readme.html" -Destination $source -Recurse
+
+
+$destination = "$icuDir\source\dist\icu-windows.zip"
+Remove-Item -Path $destination -ErrorAction Continue
+Add-Type -assembly "system.io.compression.filesystem"
+Echo $source
+Echo $destination
+[io.compression.zipfile]::CreateFromDirectory($source, $destination)
+
  echo $destination
 \ No newline at end of file
diff --git a/icu4c/source/common/ucnv_u8.cpp b/icu4c/source/common/ucnv_u8.cpp

index 094e2dfb6f43279885dde600400da5cc2af10dfe..7089d9400c6e95a496162d4bbfcd7d6ab50a9b89 100644 (file)
--- a/icu4c/source/common/ucnv_u8.cpp
+++ b/icu4c/source/common/ucnv_u8.cpp
@@ -696,36 +696,20 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
          // Use a single counter for source and target, counting the minimum of
          // the source length and the target capacity.
          // Let the standard converter handle edge cases.
-        const uint8_t *limit=sourceLimit;
          if(count>targetCapacity) {
-            limit-=(count-targetCapacity);
              count=targetCapacity;
          }
  
-        // The conversion loop checks count>0 only once per 1/2/3-byte character.
-        // If the buffer ends with a truncated 2- or 3-byte sequence,
+        // The conversion loop checks count>0 only once per character.
+        // If the buffer ends with a truncated sequence,
          // then we reduce the count to stop before that,
          // and collect the remaining bytes after the conversion loop.
-        {
-            // Do not go back into the bytes that will be read for finishing a partial
-            // sequence from the previous buffer.
-            int32_t length=count-toULimit;
-            if(length>0) {
-                uint8_t b1=*(limit-1);
-                if(U8_IS_SINGLE(b1)) {
-                    // common ASCII character
-                } else if(U8_IS_TRAIL(b1) && length>=2) {
-                    uint8_t b2=*(limit-2);
-                    if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
-                        // truncated 3-byte sequence
-                        count-=2;
-                    }
-                } else if(0xc2<=b1 && b1<0xf0) {
-                    // truncated 2- or 3-byte sequence
-                    --count;
-                }
-            }
-        }
+
+        // Do not go back into the bytes that will be read for finishing a partial
+        // sequence from the previous buffer.
+        int32_t length=count-toULimit;
+        U8_TRUNCATE_IF_INCOMPLETE(source, 0, length);
+        count=toULimit+length;
      }
  
      if(c!=0) {
@@ -815,7 +799,7 @@ moreBytes:
              }
  
              /* copy the legal byte sequence to the target */
-            if(count>=toULength) {
+            {
                  int8_t i;
  
                  for(i=0; i<oldToULength; ++i) {
@@ -826,14 +810,6 @@ moreBytes:
                      *target++=*source++;
                  }
                  count-=toULength;
-            } else {
-                // A supplementary character that does not fit into the target.
-                // Let the standard converter handle this.
-                source-=(toULength-oldToULength);
-                pToUArgs->source=(char *)source;
-                pFromUArgs->target=(char *)target;
-                *pErrorCode=U_USING_DEFAULT_WARNING;
-                return;
              }
          }
      }
@@ -857,8 +833,7 @@ moreBytes:
                          utf8->toULength=toULength;
                          utf8->mode=toULimit;
                          break;
-                    } else if(!U8_IS_TRAIL(b=*source)) {
-                        /* lead byte in trail byte position */
+                    } else if(!icu::UTF8::isValidTrail(c, b=*source, toULength, toULimit)) {
                          utf8->toULength=toULength;
                          *pErrorCode=U_ILLEGAL_CHAR_FOUND;
                          break;
diff --git a/icu4c/source/common/unicode/platform.h b/icu4c/source/common/unicode/platform.h

index f220b1fc34d8e2a90771b49aeee4b2290bf8bfca..217de4a1801d408289ade0da6c1f409cb06acbea 100644 (file)
--- a/icu4c/source/common/unicode/platform.h
+++ b/icu4c/source/common/unicode/platform.h
@@ -631,7 +631,7 @@ namespace std {
   */
  #ifdef U_CHARSET_IS_UTF8
      /* Use the predefined value. */
-#elif U_PLATFORM == U_PF_ANDROID || U_PLATFORM_IS_DARWIN_BASED
+#elif U_PLATFORM_IS_LINUX_BASED || U_PLATFORM_IS_DARWIN_BASED
  #   define U_CHARSET_IS_UTF8 1
  #else
  #   define U_CHARSET_IS_UTF8 0
diff --git a/icu4c/source/common/unicode/utf8.h b/icu4c/source/common/unicode/utf8.h

index 021fdcf1f244e4fdaf9f392c6cc43dbd9166925d..8ccc7dfebab2514d40b5876d430c5e049ab56268 100644 (file)
--- a/icu4c/source/common/unicode/utf8.h
+++ b/icu4c/source/common/unicode/utf8.h
@@ -380,7 +380,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
  #define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) { \
      (c)=(uint8_t)(s)[(i)++]; \
      if(!U8_IS_SINGLE(c)) { \
-        uint8_t __t; \
+        uint8_t __t = 0; \
          if((i)!=(length) && \
              /* fetch/validate/assemble all but last trail byte */ \
              ((c)>=0xe0 ? \
@@ -592,12 +592,15 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
   * If the offset points to a UTF-8 trail byte,
   * then the offset is moved backward to the corresponding lead byte.
   * Otherwise, it is not modified.
+ *
   * "Safe" macro, checks for illegal sequences and for string boundaries.
+ * Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i].
   *
   * @param s const uint8_t * string
   * @param start int32_t starting string offset (usually 0)
   * @param i int32_t string offset, must be start<=i
   * @see U8_SET_CP_START_UNSAFE
+ * @see U8_TRUNCATE_IF_INCOMPLETE
   * @stable ICU 2.4
   */
  #define U8_SET_CP_START(s, start, i) { \
@@ -606,6 +609,51 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
      } \
  }
  
+/**
+ * If the string ends with a UTF-8 byte sequence that is valid so far
+ * but incomplete, then reduce the length of the string to end before
+ * the lead byte of that incomplete sequence.
+ * For example, if the string ends with E1 80, the length is reduced by 2.
+ *
+ * Useful for processing text split across multiple buffers
+ * (save the incomplete sequence for later)
+ * and for optimizing iteration
+ * (check for string length only once per character).
+ *
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ * Unlike U8_SET_CP_START(), this macro never reads s[length].
+ *
+ * (In UTF-16, simply check for U16_IS_LEAD(last code unit).)
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param length int32_t string length, must be start<=length
+ * @see U8_SET_CP_START
+ * @draft ICU 61
+ */
+#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) \
+    if((length)>(start)) { \
+        uint8_t __b1=s[(length)-1]; \
+        if(U8_IS_SINGLE(__b1)) { \
+            /* common ASCII character */ \
+        } else if(U8_IS_LEAD(__b1)) { \
+            --(length); \
+        } else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
+            uint8_t __b2=s[(length)-2]; \
+            if(0xe0<=__b2 && __b2<=0xf4) { \
+                if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
+                        U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
+                    (length)-=2; \
+                } \
+            } else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
+                uint8_t __b3=s[(length)-3]; \
+                if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
+                    (length)-=3; \
+                } \
+            } \
+        } \
+    }
+
  /* definitions with backward iteration -------------------------------------- */
  
  /**
diff --git a/icu4c/source/common/utf_impl.cpp b/icu4c/source/common/utf_impl.cpp

index f78c566e0988843b715965ee81232cb7dffd6651..9dd241a12bfa16788e4a6aeb06488c4df9a12a13 100644 (file)
--- a/icu4c/source/common/utf_impl.cpp
+++ b/icu4c/source/common/utf_impl.cpp
@@ -238,33 +238,45 @@ utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, U
      int32_t i=*pi;
      if(U8_IS_TRAIL(c) && i>start) {
          uint8_t b1=s[--i];
-        if(0xc2<=b1 && b1<0xe0) {
-            *pi=i;
-            return ((b1-0xc0)<<6)|(c&0x3f);
+        if(U8_IS_LEAD(b1)) {
+            if(b1<0xe0) {
+                *pi=i;
+                return ((b1-0xc0)<<6)|(c&0x3f);
+            } else if(b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c)) {
+                // Truncated 3- or 4-byte sequence.
+                *pi=i;
+                return errorValue(1, strict);
+            }
          } else if(U8_IS_TRAIL(b1) && i>start) {
              // Extract the value bits from the last trail byte.
              c&=0x3f;
              uint8_t b2=s[--i];
-            if(0xe0<=b2 && b2<0xf0) {
-                b2&=0xf;
-                if(strict!=-2) {
-                    if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
-                        *pi=i;
-                        c=(b2<<12)|((b1&0x3f)<<6)|c;
-                        if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) {
-                            return c;
-                        } else {
-                            // strict: forbid non-characters like U+fffe
-                            return errorValue(2, strict);
+            if(0xe0<=b2 && b2<=0xf4) {
+                if(b2<0xf0) {
+                    b2&=0xf;
+                    if(strict!=-2) {
+                        if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
+                            *pi=i;
+                            c=(b2<<12)|((b1&0x3f)<<6)|c;
+                            if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) {
+                                return c;
+                            } else {
+                                // strict: forbid non-characters like U+fffe
+                                return errorValue(2, strict);
+                            }
+                        }
+                    } else {
+                        // strict=-2 -> lenient: allow surrogates
+                        b1-=0x80;
+                        if((b2>0 || b1>=0x20)) {
+                            *pi=i;
+                            return (b2<<12)|(b1<<6)|c;
                          }
                      }
-                } else {
-                    // strict=-2 -> lenient: allow surrogates
-                    b1-=0x80;
-                    if((b2>0 || b1>=0x20)) {
-                        *pi=i;
-                        return (b2<<12)|(b1<<6)|c;
-                    }
+                } else if(U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
+                    // Truncated 4-byte sequence.
+                    *pi=i;
+                    return errorValue(2, strict);
                  }
              } else if(U8_IS_TRAIL(b2) && i>start) {
                  uint8_t b3=s[--i];
@@ -281,16 +293,7 @@ utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, U
                          }
                      }
                  }
-            } else if(0xf0<=b2 && b2<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
-                // Truncated 4-byte sequence.
-                *pi=i;
-                return errorValue(2, strict);
              }
-        } else if((0xe0<=b1 && b1<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b1, c)) ||
-                (0xf0<=b1 && b1<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b1, c))) {
-            // Truncated 3- or 4-byte sequence.
-            *pi=i;
-            return errorValue(1, strict);
          }
      }
      return errorValue(0, strict);
@@ -303,29 +306,23 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i) {
      uint8_t c=s[i];
      if(U8_IS_TRAIL(c) && i>start) {
          uint8_t b1=s[--i];
-        if(0xc2<=b1 && b1<0xe0) {
-            return i;
+        if(U8_IS_LEAD(b1)) {
+            if(b1<0xe0 ||
+                    (b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c))) {
+                return i;
+            }
          } else if(U8_IS_TRAIL(b1) && i>start) {
              uint8_t b2=s[--i];
-            if(0xe0<=b2 && b2<0xf0) {
-                if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
+            if(0xe0<=b2 && b2<=0xf4) {
+                if(b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b2, b1) : U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
                      return i;
                  }
              } else if(U8_IS_TRAIL(b2) && i>start) {
                  uint8_t b3=s[--i];
-                if(0xf0<=b3 && b3<=0xf4) {
-                    if(U8_IS_VALID_LEAD4_AND_T1(b3, b2)) {
-                        return i;
-                    }
+                if(0xf0<=b3 && b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b3, b2)) {
+                    return i;
                  }
-            } else if(0xf0<=b2 && b2<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
-                // Truncated 4-byte sequence.
-                return i;
              }
-        } else if((0xe0<=b1 && b1<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b1, c)) ||
-                (0xf0<=b1 && b1<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b1, c))) {
-            // Truncated 3- or 4-byte sequence.
-            return i;
          }
      }
      return orig_i;
diff --git a/icu4c/source/config/dist.mk b/icu4c/source/config/dist.mk

index ccc5837a76c657561639ed462cfabbe9b0a36e14..3e6e42e50ddcfe611d7feff44887d0bc78f0c775 100644 (file)
--- a/icu4c/source/config/dist.mk
+++ b/icu4c/source/config/dist.mk
@@ -19,7 +19,7 @@ DISTY_DIR=dist
  DISTY_TMP=dist/tmp
  DISTY_ICU=$(DISTY_TMP)/icu
  DISTY_DATA=$(DISTY_ICU)/source/data
-DISTY_RMV=brkitr coll curr lang locales mappings rbnf region translit xml zone misc unit
+DISTY_RMV=brkitr coll curr lang locales mappings rbnf region translit xml zone misc/*.txt misc/*.mk unit
  DISTY_RMDIR=$(DISTY_RMV:%=$(DISTY_DATA)/%)
  DISTY_IN=$(DISTY_DATA)/in
  DOCZIP=icu-docs.zip
@@ -49,7 +49,7 @@ $(DISTY_TMP):
  
  $(DISTY_DOC_ZIP):  $(DOCZIP) $(DISTY_FILE_DIR)
         cp $(DOCZIP) $(DISTY_DOC_ZIP)
-       ln -sf $(DISTY_DOC_ZIP) $(DISTY_FILE_DIR)/icu4c-docs.zip
+       ln -sf $(shell basename $(DISTY_DOC_ZIP)) $(DISTY_FILE_DIR)/icu4c-docs.zip
  
  $(DISTY_DAT): 
         echo Missing $@
@@ -74,14 +74,14 @@ $(DISTY_FILE_TGZ) $(DISTY_FILE_ZIP) $(DISTY_DATA_ZIP):  $(DISTY_DAT) $(DISTY_TMP
         $(MKINSTALLDIRS) $(DISTY_IN)
         echo DISTY_DAT=$(DISTY_DAT)
         cp $(DISTY_DAT) $(DISTY_IN)
-       ( cd $(DISTY_TMP)/icu ; python as_is/bomlist.py > as_is/bomlist.txt || rm -f as_is/bomlist.txt )
-       ( cd $(DISTY_TMP) ; zip -rlq $(DISTY_FILE_ZIP) icu )
         $(RMV) $(DISTY_RMDIR)
         ( cd $(DISTY_TMP)/icu ; python as_is/bomlist.py > as_is/bomlist.txt || rm -f as_is/bomlist.txt )
         ( cd $(DISTY_TMP) ; tar cfpz $(DISTY_FILE_TGZ) icu )
-       ln -sf $(DISTY_FILE_ZIP) $(DISTY_FILE_DIR)/icu4c-src.zip
-       ln -sf $(DISTY_FILE_TGZ) $(DISTY_FILE_DIR)/icu4c-src.tgz
-       ln -sf $(DISTY_DATA_ZIP) $(DISTY_FILE_DIR)/icu4c-data.zip
+       ( cd $(DISTY_TMP) ; zip -rlq $(DISTY_FILE_ZIP) icu )
+       $(RMV) $(DISTY_TMP)
+       ln -sf $(shell basename $(DISTY_FILE_ZIP)) $(DISTY_FILE_DIR)/icu4c-src.zip
+       ln -sf $(shell basename $(DISTY_FILE_TGZ)) $(DISTY_FILE_DIR)/icu4c-src.tgz
+       ln -sf $(shell basename $(DISTY_DATA_ZIP)) $(DISTY_FILE_DIR)/icu4c-data.zip
         ls -l $(DISTY_FILE_TGZ) $(DISTY_FILE_ZIP) $(DISTY_DATA_ZIP)
  
  
diff --git a/icu4c/source/i18n/islamcal.cpp b/icu4c/source/i18n/islamcal.cpp

index 4fd0e07d920746d08dd3e2086a52bd4dca818298..b84bedfa0916b170cd2a86514fe740cb8a8a03be 100644 (file)
--- a/icu4c/source/i18n/islamcal.cpp
+++ b/icu4c/source/i18n/islamcal.cpp
@@ -614,7 +614,7 @@ void IslamicCalendar::handleComputeFields(int32_t julianDay, UErrorCode &status)
              days = julianDay - ASTRONOMICAL_EPOC;
          }
          // Use the civil calendar approximation, which is just arithmetic
-        year  = (int)ClockMath::floorDivide( (double)(30 * days + 10646) , 10631.0 );
+        year  = (int32_t)ClockMath::floorDivide(30 * (int64_t)days + 10646, (int64_t)10631);
          month = (int32_t)uprv_ceil((days - 29 - yearStart(year)) / 29.5 );
          month = month<11?month:11;
          startDate = monthStart(year, month);
diff --git a/icu4c/source/i18n/nfrs.cpp b/icu4c/source/i18n/nfrs.cpp

index 769fad353fbd5988ff77bcf0262979588ed16e88..d5b368d42300b31410b1deb720151a054e22ac0c 100644 (file)
--- a/icu4c/source/i18n/nfrs.cpp
+++ b/icu4c/source/i18n/nfrs.cpp
@@ -681,7 +681,7 @@ static void dumpUS(FILE* f, const UnicodeString& us) {
  #endif
  
  UBool
-NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
+NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, int32_t nonNumericalExecutedRuleMask, Formattable& result) const
  {
      // try matching each rule in the rule set against the text being
      // parsed.  Whichever one matches the most characters is the one
@@ -707,9 +707,12 @@ NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBoun
  #endif
      // Try each of the negative rules, fraction rules, infinity rules and NaN rules
      for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
-        if (nonNumericalRules[i]) {
+        if (nonNumericalRules[i] && ((nonNumericalExecutedRuleMask >> i) & 1) == 0) {
+            // Mark this rule as being executed so that we don't try to execute it again.
+            nonNumericalExecutedRuleMask |= 1 << i;
+
              Formattable tempResult;
-            UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
+            UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, tempResult);
              if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
                  result = tempResult;
                  highWaterMark = workingPos;
@@ -748,7 +751,7 @@ NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBoun
                  continue;
              }
              Formattable tempResult;
-            UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
+            UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, tempResult);
              if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
                  result = tempResult;
                  highWaterMark = workingPos;
diff --git a/icu4c/source/i18n/nfrs.h b/icu4c/source/i18n/nfrs.h

index 1e39b289b4dd2fb3b15821c368c42641d2745162..d4797e7ff5547e1ea539de37393f82e3647fc94f 100644 (file)
--- a/icu4c/source/i18n/nfrs.h
+++ b/icu4c/source/i18n/nfrs.h
@@ -55,7 +55,7 @@ public:
      void  format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const;
      void  format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const;
  
-    UBool parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const;
+    UBool parse(const UnicodeString& text, ParsePosition& pos, double upperBound, int32_t nonNumericalExecutedRuleMask, Formattable& result) const;
  
      void appendRules(UnicodeString& result) const; // toString
  
diff --git a/icu4c/source/i18n/nfrule.cpp b/icu4c/source/i18n/nfrule.cpp

index f24be11bcdc8f8441e431e2e6d7eeb1b29ac708b..f32ed5a747c057d1f6b84c86cba1ac0720d28329 100644 (file)
--- a/icu4c/source/i18n/nfrule.cpp
+++ b/icu4c/source/i18n/nfrule.cpp
@@ -900,6 +900,7 @@ NFRule::doParse(const UnicodeString& text,
                  ParsePosition& parsePosition,
                  UBool isFractionRule,
                  double upperBound,
+                int32_t nonNumericalExecutedRuleMask,
                  Formattable& resVal) const
  {
      // internally we operate on a copy of the string being parsed
@@ -1002,6 +1003,7 @@ NFRule::doParse(const UnicodeString& text,
          temp.setTo(ruleText, sub1Pos, sub2Pos - sub1Pos);
          double partialResult = matchToDelimiter(workText, start, tempBaseValue,
              temp, pp, sub1,
+            nonNumericalExecutedRuleMask,
              upperBound);
  
          // if we got a successful match (or were trying to match a
@@ -1022,6 +1024,7 @@ NFRule::doParse(const UnicodeString& text,
              temp.setTo(ruleText, sub2Pos, ruleText.length() - sub2Pos);
              partialResult = matchToDelimiter(workText2, 0, partialResult,
                  temp, pp2, sub2,
+                nonNumericalExecutedRuleMask,
                  upperBound);
  
              // if we got a successful match on this second
@@ -1158,6 +1161,7 @@ NFRule::matchToDelimiter(const UnicodeString& text,
                           const UnicodeString& delimiter,
                           ParsePosition& pp,
                           const NFSubstitution* sub,
+                         int32_t nonNumericalExecutedRuleMask,
                           double upperBound) const
  {
         UErrorCode status = U_ZERO_ERROR;
@@ -1191,6 +1195,7 @@ NFRule::matchToDelimiter(const UnicodeString& text,
  #else
                      formatter->isLenient(),
  #endif
+                    nonNumericalExecutedRuleMask,
                      result);
  
                  // if the substitution could match all the text up to
@@ -1244,6 +1249,7 @@ NFRule::matchToDelimiter(const UnicodeString& text,
  #else
              formatter->isLenient(),
  #endif
+            nonNumericalExecutedRuleMask,
              result);
          if (success && (tempPP.getIndex() != 0)) {
              // if there's a successful match (or it's a null
diff --git a/icu4c/source/i18n/nfrule.h b/icu4c/source/i18n/nfrule.h

index 809119ca6c6ce215b24c7d98c242388fadec1fe3..0fabe20237336a19ec13cbda5d0e1aa8cd6a3af1 100644 (file)
--- a/icu4c/source/i18n/nfrule.h
+++ b/icu4c/source/i18n/nfrule.h
@@ -74,6 +74,7 @@ public:
                    ParsePosition& pos, 
                    UBool isFractional, 
                    double upperBound,
+                  int32_t nonNumericalExecutedRuleMask,
                    Formattable& result) const;
  
      UBool shouldRollBack(int64_t number) const;
@@ -94,6 +95,7 @@ private:
      int32_t indexOfAnyRulePrefix() const;
      double matchToDelimiter(const UnicodeString& text, int32_t startPos, double baseValue,
                              const UnicodeString& delimiter, ParsePosition& pp, const NFSubstitution* sub, 
+                            int32_t nonNumericalExecutedRuleMask,
                              double upperBound) const;
      void stripPrefix(UnicodeString& text, const UnicodeString& prefix, ParsePosition& pp) const;
  
diff --git a/icu4c/source/i18n/nfsubs.cpp b/icu4c/source/i18n/nfsubs.cpp

index b5da9821d55beca770cf63d980dc47971a2a925d..ec9e9b873cb9a20947cb19d0249b302eddbbe378 100644 (file)
--- a/icu4c/source/i18n/nfsubs.cpp
+++ b/icu4c/source/i18n/nfsubs.cpp
@@ -155,6 +155,7 @@ public:
          double baseValue,
          double upperBound,
          UBool lenientParse,
+        int32_t nonNumericalExecutedRuleMask,
          Formattable& result) const;
  
      virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const {
@@ -221,6 +222,7 @@ public:
          double baseValue,
          double upperBound,
          UBool lenientParse,
+        int32_t nonNumericalExecutedRuleMask,
          Formattable& result) const;
  
      virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue + oldRuleValue; }
@@ -292,6 +294,7 @@ public:
          double baseValue,
          double upperBound,
          UBool /*lenientParse*/,
+        int32_t nonNumericalExecutedRuleMask,
          Formattable& result) const;
  
      virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue / oldRuleValue; }
@@ -689,6 +692,7 @@ NFSubstitution::doParse(const UnicodeString& text,
                          double baseValue,
                          double upperBound,
                          UBool lenientParse,
+                        int32_t nonNumericalExecutedRuleMask,
                          Formattable& result) const
  {
  #ifdef RBNF_DEBUG
@@ -709,7 +713,7 @@ NFSubstitution::doParse(const UnicodeString& text,
      // on), then also try parsing the text using a default-
      // constructed NumberFormat
      if (ruleSet != NULL) {
-        ruleSet->parse(text, parsePosition, upperBound, result);
+        ruleSet->parse(text, parsePosition, upperBound, nonNumericalExecutedRuleMask, result);
          if (lenientParse && !ruleSet->isFractionRuleSet() && parsePosition.getIndex() == 0) {
              UErrorCode status = U_ZERO_ERROR;
              NumberFormat* fmt = NumberFormat::createInstance(status);
@@ -931,18 +935,19 @@ ModulusSubstitution::doParse(const UnicodeString& text,
                               double baseValue,
                               double upperBound,
                               UBool lenientParse,
+                             int32_t nonNumericalExecutedRuleMask,
                               Formattable& result) const
  {
      // if this isn't a >>> substitution, we can just use the
      // inherited parse() routine to do the parsing
      if (ruleToUse == NULL) {
-        return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, lenientParse, result);
+        return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, lenientParse, nonNumericalExecutedRuleMask, result);
  
          // but if it IS a >>> substitution, we have to do it here: we
          // use the specific rule's doParse() method, and then we have to
          // do some of the other work of NFRuleSet.parse()
      } else {
-        ruleToUse->doParse(text, parsePosition, FALSE, upperBound, result);
+        ruleToUse->doParse(text, parsePosition, FALSE, upperBound, nonNumericalExecutedRuleMask, result);
  
          if (parsePosition.getIndex() != 0) {
              UErrorCode status = U_ZERO_ERROR;
@@ -1118,12 +1123,13 @@ FractionalPartSubstitution::doParse(const UnicodeString& text,
                  double baseValue,
                  double /*upperBound*/,
                  UBool lenientParse,
+                int32_t nonNumericalExecutedRuleMask,
                  Formattable& resVal) const
  {
      // if we're not in byDigits mode, we can just use the inherited
      // doParse()
      if (!byDigits) {
-        return NFSubstitution::doParse(text, parsePosition, baseValue, 0, lenientParse, resVal);
+        return NFSubstitution::doParse(text, parsePosition, baseValue, 0, lenientParse, nonNumericalExecutedRuleMask, resVal);
  
          // if we ARE in byDigits mode, parse the text one digit at a time
          // using this substitution's owning rule set (we do this by setting
@@ -1141,7 +1147,7 @@ FractionalPartSubstitution::doParse(const UnicodeString& text,
          while (workText.length() > 0 && workPos.getIndex() != 0) {
              workPos.setIndex(0);
              Formattable temp;
-            getRuleSet()->parse(workText, workPos, 10, temp);
+            getRuleSet()->parse(workText, workPos, 10, nonNumericalExecutedRuleMask, temp);
              UErrorCode status = U_ZERO_ERROR;
              digit = temp.getLong(status);
  //            digit = temp.getType() == Formattable::kLong ?
@@ -1249,6 +1255,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text,
                                 double baseValue,
                                 double upperBound,
                                 UBool /*lenientParse*/,
+                               int32_t nonNumericalExecutedRuleMask,
                                 Formattable& result) const
  {
      // we don't have to do anything special to do the parsing here,
@@ -1267,7 +1274,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text,
  
          while (workText.length() > 0 && workPos.getIndex() != 0) {
              workPos.setIndex(0);
-            getRuleSet()->parse(workText, workPos, 1, temp); // parse zero or nothing at all
+            getRuleSet()->parse(workText, workPos, 1, nonNumericalExecutedRuleMask, temp); // parse zero or nothing at all
              if (workPos.getIndex() == 0) {
                  // we failed, either there were no more zeros, or the number was formatted with digits
                  // either way, we're done
@@ -1289,7 +1296,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text,
      }
  
      // we've parsed off the zeros, now let's parse the rest from our current position
-    NFSubstitution::doParse(workText, parsePosition, withZeros ? 1 : baseValue, upperBound, FALSE, result);
+    NFSubstitution::doParse(workText, parsePosition, withZeros ? 1 : baseValue, upperBound, FALSE, nonNumericalExecutedRuleMask, result);
  
      if (withZeros) {
          // any base value will do in this case.  is there a way to
diff --git a/icu4c/source/i18n/nfsubs.h b/icu4c/source/i18n/nfsubs.h

index e8b259137edd1df7ac973dba37d7926c06914134..b8a5fc66198194c6217817e0f91281abf106d70e 100644 (file)
--- a/icu4c/source/i18n/nfsubs.h
+++ b/icu4c/source/i18n/nfsubs.h
@@ -191,6 +191,7 @@ public:
          double baseValue,
          double upperBound, 
          UBool lenientParse,
+        int32_t nonNumericalExecutedRuleMask,
          Formattable& result) const;
      
      /**
diff --git a/icu4c/source/i18n/number_fluent.cpp b/icu4c/source/i18n/number_fluent.cpp

index 3be3401ef3a28d5e76ef7973fbea6ed495dd589e..27113106c5045117efdc9eb1d4a5e3a467df1d94 100644 (file)
--- a/icu4c/source/i18n/number_fluent.cpp
+++ b/icu4c/source/i18n/number_fluent.cpp
@@ -33,12 +33,13 @@ Derived NumberFormatterSettings<Derived>::unit(const icu::MeasureUnit &unit) con
  }
  
  template<typename Derived>
-Derived NumberFormatterSettings<Derived>::adoptUnit(const icu::MeasureUnit *unit) const {
+Derived NumberFormatterSettings<Derived>::adoptUnit(icu::MeasureUnit *unit) const {
      Derived copy(*this);
      // Just copy the unit into the MacroProps by value, and delete it since we have ownership.
      // NOTE: Slicing occurs here. However, CurrencyUnit can be restored from MeasureUnit.
      // TimeUnit may be affected, but TimeUnit is not as relevant to number formatting.
      if (unit != nullptr) {
+      // TODO: On nullptr, reset to default value?
          copy.fMacros.unit = *unit;
          delete unit;
      }
@@ -54,10 +55,11 @@ Derived NumberFormatterSettings<Derived>::perUnit(const icu::MeasureUnit &perUni
  }
  
  template<typename Derived>
-Derived NumberFormatterSettings<Derived>::adoptPerUnit(const icu::MeasureUnit *perUnit) const {
+Derived NumberFormatterSettings<Derived>::adoptPerUnit(icu::MeasureUnit *perUnit) const {
      Derived copy(*this);
      // See comments above about slicing and ownership.
      if (perUnit != nullptr) {
+      // TODO: On nullptr, reset to default value?
          copy.fMacros.perUnit = *perUnit;
          delete perUnit;
      }
@@ -96,7 +98,7 @@ Derived NumberFormatterSettings<Derived>::symbols(const DecimalFormatSymbols &sy
  }
  
  template<typename Derived>
-Derived NumberFormatterSettings<Derived>::adoptSymbols(const NumberingSystem *ns) const {
+Derived NumberFormatterSettings<Derived>::adoptSymbols(NumberingSystem *ns) const {
      Derived copy(*this);
      copy.fMacros.symbols.setTo(ns);
      return copy;
diff --git a/icu4c/source/i18n/number_grouping.cpp b/icu4c/source/i18n/number_grouping.cpp

index 67fd4c943178085f4c2edc282f5dab884a9bb08c..a2b1bbd6b3388e3bd3c4da3271f60c008ba913a5 100644 (file)
--- a/icu4c/source/i18n/number_grouping.cpp
+++ b/icu4c/source/i18n/number_grouping.cpp
@@ -44,7 +44,7 @@ Grouper Grouper::forStrategy(UGroupingStrategy grouping) {
          return {-2, -2, -3};
      case UNUM_GROUPING_ON_ALIGNED:
          return {-4, -4, 1};
-    case UNUM_GROUPING_WESTERN:
+    case UNUM_GROUPING_THOUSANDS:
          return {3, 3, 1};
      default:
          U_ASSERT(FALSE);
diff --git a/icu4c/source/i18n/rbnf.cpp b/icu4c/source/i18n/rbnf.cpp

index 66f532e79aaab3ed101b2f7bed763d244b7314b6..1b75e5ee1b73f81fbf2dc817e555f856e36716ca 100644 (file)
--- a/icu4c/source/i18n/rbnf.cpp
+++ b/icu4c/source/i18n/rbnf.cpp
@@ -1371,7 +1371,7 @@ RuleBasedNumberFormat::parse(const UnicodeString& text,
              ParsePosition working_pp(0);
              Formattable working_result;
  
-            rp->parse(workingText, working_pp, kMaxDouble, working_result);
+            rp->parse(workingText, working_pp, kMaxDouble, 0, working_result);
              if (working_pp.getIndex() > high_pp.getIndex()) {
                  high_pp = working_pp;
                  high_result = working_result;
diff --git a/icu4c/source/i18n/unicode/numberformatter.h b/icu4c/source/i18n/unicode/numberformatter.h

index ac852f27e8eda9cb1124a1a74fee2319c2a8e833..1152f154ed44266f625193d09407239a08d709e0 100644 (file)
--- a/icu4c/source/i18n/unicode/numberformatter.h
+++ b/icu4c/source/i18n/unicode/numberformatter.h
@@ -172,7 +172,7 @@ typedef enum UNumberUnitWidth {
   * <li>MIN2: 1234 and 12,34,567
   * <li>AUTO: 1,234 and 12,34,567
   * <li>ON_ALIGNED: 1,234 and 12,34,567
- * <li>WESTERN: 1,234 and 1,234,567
+ * <li>THOUSANDS: 1,234 and 1,234,567
   * </ul>
   *
   * <p>
@@ -248,7 +248,7 @@ typedef enum UGroupingStrategy {
       *
       * @draft ICU 61
       */
-    UNUM_GROUPING_WESTERN
+    UNUM_GROUPING_THOUSANDS
  
  } UGroupingStrategy;
  
@@ -1515,7 +1515,8 @@ class U_I18N_API NumberFormatterSettings {
       * All units will be properly localized with locale data, and all units are compatible with notation styles,
       * rounding strategies, and other number formatter settings.
       *
-     * Pass this method any instance of {@link MeasureUnit}. For units of measure:
+     * Pass this method any instance of {@link MeasureUnit}. For units of measure (which often involve the
+     * factory methods that return a pointer):
       *
       * <pre>
       * NumberFormatter::with().adoptUnit(MeasureUnit::createMeter(status))
@@ -1550,7 +1551,11 @@ class U_I18N_API NumberFormatterSettings {
  
      /**
       * Like unit(), but takes ownership of a pointer.  Convenient for use with the MeasureFormat factory
-     * methods, which return pointers that need ownership.
+     * methods, which return pointers that need ownership.  Example:
+     *
+     * <pre>
+     * NumberFormatter::with().adoptUnit(MeasureUnit::createMeter(status))
+     * </pre>
       *
       * @param unit
       *            The unit to render.
@@ -1559,19 +1564,14 @@ class U_I18N_API NumberFormatterSettings {
       * @see MeasureUnit
       * @draft ICU 60
       */
-    Derived adoptUnit(const icu::MeasureUnit *unit) const;
+    Derived adoptUnit(icu::MeasureUnit *unit) const;
  
      /**
       * Sets a unit to be used in the denominator. For example, to format "3 m/s", pass METER to the unit and SECOND to
       * the perUnit.
       *
-     * Pass this method any instance of {@link MeasureUnit}. For example:
-     *
-     * <pre>
-     * NumberFormatter::with()
-     *      .adoptUnit(MeasureUnit::createMeter(status))
-     *      .adoptPerUnit(MeasureUnit::createSecond(status))
-     * </pre>
+     * Pass this method any instance of {@link MeasureUnit}.  Since MeasureUnit factory methods return pointers, the
+     * {@link #adoptPerUnit} version of this method is often more useful.
       *
       * The default is not to display any unit in the denominator.
       *
@@ -1587,7 +1587,13 @@ class U_I18N_API NumberFormatterSettings {
  
      /**
       * Like perUnit(), but takes ownership of a pointer.  Convenient for use with the MeasureFormat factory
-     * methods, which return pointers that need ownership.
+     * methods, which return pointers that need ownership.  Example:
+     *
+     * <pre>
+     * NumberFormatter::with()
+     *      .adoptUnit(MeasureUnit::createMeter(status))
+     *      .adoptPerUnit(MeasureUnit::createSecond(status))
+     * </pre>
       *
       * @param perUnit
       *            The unit to render in the denominator.
@@ -1596,7 +1602,7 @@ class U_I18N_API NumberFormatterSettings {
       * @see MeasureUnit
       * @draft ICU 61
       */
-    Derived adoptPerUnit(const icu::MeasureUnit *perUnit) const;
+    Derived adoptPerUnit(icu::MeasureUnit *perUnit) const;
  
      /**
       * Specifies the rounding strategy to use when formatting numbers.
@@ -1761,7 +1767,7 @@ class U_I18N_API NumberFormatterSettings {
       * @see NumberingSystem
       * @draft ICU 60
       */
-    Derived adoptSymbols(const NumberingSystem *symbols) const;
+    Derived adoptSymbols(NumberingSystem *symbols) const;
  
      /**
       * Sets the width of the unit (measure unit or currency).  Most common values:
diff --git a/icu4c/source/test/cintltst/utf8tst.c b/icu4c/source/test/cintltst/utf8tst.c

index 0bbb5e5413dc76e516338f98d4f977149c80109f..b7062e3b82f5030b27b291d9b51544e8c53c8f3c 100644 (file)
--- a/icu4c/source/test/cintltst/utf8tst.c
+++ b/icu4c/source/test/cintltst/utf8tst.c
@@ -94,6 +94,7 @@ static void TestFwdBack(void);
  static void TestFwdBackUnsafe(void);
  static void TestSetChar(void);
  static void TestSetCharUnsafe(void);
+static void TestTruncateIfIncomplete(void);
  static void TestAppendChar(void);
  static void TestAppend(void);
  static void TestSurrogates(void);
@@ -114,6 +115,7 @@ addUTF8Test(TestNode** root)
      addTest(root, &TestFwdBackUnsafe,           "utf8tst/TestFwdBackUnsafe");
      addTest(root, &TestSetChar,                 "utf8tst/TestSetChar");
      addTest(root, &TestSetCharUnsafe,           "utf8tst/TestSetCharUnsafe");
+    addTest(root, &TestTruncateIfIncomplete,    "utf8tst/TestTruncateIfIncomplete");
      addTest(root, &TestAppendChar,              "utf8tst/TestAppendChar");
      addTest(root, &TestAppend,                  "utf8tst/TestAppend");
      addTest(root, &TestSurrogates,              "utf8tst/TestSurrogates");
@@ -927,6 +929,64 @@ static void TestSetCharUnsafe() {
      }
  }
  
+static void TestTruncateIfIncomplete() {
+    // Difference from U8_SET_CP_START():
+    // U8_TRUNCATE_IF_INCOMPLETE() does not look at s[length].
+    // Therefore, if the last byte is a lead byte, then this macro truncates
+    // even if the byte at the input index cannot continue a valid sequence
+    // (including when that is not a trail byte).
+    // On the other hand, if the last byte is a trail byte, then the two macros behave the same.
+    static const struct {
+        const char *s;
+        int32_t expected;
+    } cases[] = {
+        { "", 0 },
+        { "a", 1 },
+        { "\x80", 1 },
+        { "\xC1", 1 },
+        { "\xC2", 0 },
+        { "\xE0", 0 },
+        { "\xF4", 0 },
+        { "\xF5", 1 },
+        { "\x80\x80", 2 },
+        { "\xC2\xA0", 2 },
+        { "\xE0\x9F", 2 },
+        { "\xE0\xA0", 0 },
+        { "\xED\x9F", 0 },
+        { "\xED\xA0", 2 },
+        { "\xF0\x8F", 2 },
+        { "\xF0\x90", 0 },
+        { "\xF4\x8F", 0 },
+        { "\xF4\x90", 2 },
+        { "\xF5\x80", 2 },
+        { "\x80\x80\x80", 3 },
+        { "\xC2\xA0\x80", 3 },
+        { "\xE0\xA0\x80", 3 },
+        { "\xF0\x8F\x80", 3 },
+        { "\xF0\x90\x80", 0 },
+        { "\xF4\x8F\x80", 0 },
+        { "\xF4\x90\x80", 3 },
+        { "\xF5\x80\x80", 3 },
+        { "\x80\x80\x80\x80", 4 },
+        { "\xC2\xA0\x80\x80", 4 },
+        { "\xE0\xA0\x80\x80", 4 },
+        { "\xF0\x90\x80\x80", 4 },
+        { "\xF5\x80\x80\x80", 4 }
+    };
+    int32_t i;
+    for (i = 0; i < UPRV_LENGTHOF(cases); ++i) {
+        const char *s = cases[i].s;
+        int32_t expected = cases[i].expected;
+        int32_t length = (int32_t)strlen(s);
+        int32_t adjusted = length;
+        U8_TRUNCATE_IF_INCOMPLETE(s, 0, adjusted);
+        if (adjusted != expected) {
+            log_err("ERROR: U8_TRUNCATE_IF_INCOMPLETE failed for i=%d, length=%d. Expected:%d Got:%d\n",
+                    (int)i, (int)length, (int)expected, (int)adjusted);
+        }
+    }
+}
+
  static void TestAppendChar(){
  #if !U_HIDE_OBSOLETE_UTF_OLD_H
      static const uint8_t s[11]={0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00};
diff --git a/icu4c/source/test/intltest/calregts.cpp b/icu4c/source/test/intltest/calregts.cpp

index f1eb17bbed327e5216893c24d26c796695d4e04f..24951e5b8aa9f829710883b20a63a7d0a1b5bb99 100644 (file)
--- a/icu4c/source/test/intltest/calregts.cpp
+++ b/icu4c/source/test/intltest/calregts.cpp
@@ -93,6 +93,7 @@ CalendarRegressionTest::runIndexedTest( int32_t index, UBool exec, const char* &
          CASE(50,TestT9452);
          CASE(51,TestT11632);
          CASE(52,TestPersianCalOverflow);
+        CASE(53,TestIslamicCalOverflow);
      default: name = ""; break;
      }
  }
@@ -3009,9 +3010,9 @@ void CalendarRegressionTest::TestPersianCalOverflow(void) {
              month = cal->get(UCAL_MONTH, status);
              dayOfMonth = cal->get(UCAL_DATE, status);
              if ( U_FAILURE(status) ) {
-                errln("FAIL: Calendar->get MONTH/DATE for localeID %s, julianDay %d, status %s\n", localeID, jd, u_errorName(status)); 
+                errln("FAIL: Calendar->get MONTH/DATE for localeID %s, julianDay %d, status %s", localeID, jd, u_errorName(status)); 
              } else if (month > maxMonth || dayOfMonth > maxDayOfMonth) {
-                errln("FAIL: localeID %s, julianDay %d; maxMonth %d, got month %d; maxDayOfMonth %d, got dayOfMonth %d\n",
+                errln("FAIL: localeID %s, julianDay %d; maxMonth %d, got month %d; maxDayOfMonth %d, got dayOfMonth %d",
                          localeID, jd, maxMonth, month, maxDayOfMonth, dayOfMonth); 
              }
          }
@@ -3019,4 +3020,35 @@ void CalendarRegressionTest::TestPersianCalOverflow(void) {
      }
  }
  
+/**
+ * @bug tickets 12661, 13538
+ */
+void CalendarRegressionTest::TestIslamicCalOverflow(void) {
+    const char* localeID = "ar@calendar=islamic-civil";
+    UErrorCode status = U_ZERO_ERROR;
+    Calendar* cal = Calendar::createInstance(Locale(localeID), status);
+    if(U_FAILURE(status)) {
+        dataerrln("FAIL: Calendar::createInstance for localeID %s: %s", localeID, u_errorName(status));
+    } else {
+        int32_t maxMonth = cal->getMaximum(UCAL_MONTH);
+        int32_t maxDayOfMonth = cal->getMaximum(UCAL_DATE);
+        int32_t jd, year, month, dayOfMonth;
+        for (jd = 73530872; jd <= 73530876; jd++) { // year 202002, int32_t overflow if jd >= 73530874
+            status = U_ZERO_ERROR;
+            cal->clear();
+            cal->set(UCAL_JULIAN_DAY, jd);
+            year = cal->get(UCAL_YEAR, status);
+            month = cal->get(UCAL_MONTH, status);
+            dayOfMonth = cal->get(UCAL_DATE, status);
+            if ( U_FAILURE(status) ) {
+                errln("FAIL: Calendar->get YEAR/MONTH/DATE for localeID %s, julianDay %d, status %s", localeID, jd, u_errorName(status)); 
+            } else if (month > maxMonth || dayOfMonth > maxDayOfMonth) {
+                errln("FAIL: localeID %s, julianDay %d; got year %d; maxMonth %d, got month %d; maxDayOfMonth %d, got dayOfMonth %d",
+                        localeID, jd, year, maxMonth, month, maxDayOfMonth, dayOfMonth); 
+            }
+        }
+        delete cal;
+    }
+}
+
  #endif /* #if !UCONFIG_NO_FORMATTING */
diff --git a/icu4c/source/test/intltest/calregts.h b/icu4c/source/test/intltest/calregts.h

index 15d550290935ee61b75070b2b41cdef4261adf81..7d36fab0b4546032a81745a92cb816b8c376899d 100644 (file)
--- a/icu4c/source/test/intltest/calregts.h
+++ b/icu4c/source/test/intltest/calregts.h
@@ -79,6 +79,7 @@ public:
      void TestT9452(void);
      void TestT11632(void);
      void TestPersianCalOverflow(void);
+    void TestIslamicCalOverflow(void);
  
      void printdate(GregorianCalendar *cal, const char *string);
      void dowTest(UBool lenient) ;
diff --git a/icu4c/source/test/intltest/itrbnf.cpp b/icu4c/source/test/intltest/itrbnf.cpp

index 97700251a381119ed9ed6b7d4f1e61e912728a6b..719df6202aa1890460764f6c7cd43631f18d00bc 100644 (file)
--- a/icu4c/source/test/intltest/itrbnf.cpp
+++ b/icu4c/source/test/intltest/itrbnf.cpp
@@ -75,6 +75,7 @@ void IntlTestRBNF::runIndexedTest(int32_t index, UBool exec, const char* &name,
          TESTCASE(23, TestVariableDecimalPoint);
          TESTCASE(24, TestLargeNumbers);
          TESTCASE(25, TestCompactDecimalFormatStyle);
+        TESTCASE(26, TestParseFailure);
  #else
          TESTCASE(0, TestRBNFDisabled);
  #endif
@@ -2283,6 +2284,25 @@ void IntlTestRBNF::TestCompactDecimalFormatStyle() {
      doTest(&rbnf, enTestFullData, false);
  }
  
+void IntlTestRBNF::TestParseFailure() {
+    UErrorCode status = U_ZERO_ERROR;
+    RuleBasedNumberFormat rbnf(URBNF_SPELLOUT, Locale::getJapanese(), status);
+    static const char* testData[][1] = {
+        { "\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB" },
+        { NULL }
+    };
+    for (int i = 0; testData[i][0]; ++i) {
+        const char* spelledNumber = testData[i][0]; // spelled-out number
+        
+        UnicodeString spelledNumberString = UnicodeString(spelledNumber).unescape();
+        Formattable actualNumber;
+        rbnf.parse(spelledNumberString, actualNumber, status);
+        if (status != U_INVALID_FORMAT_ERROR) { // I would have expected U_PARSE_ERROR, but NumberFormat::parse gives U_INVALID_FORMAT_ERROR
+            errln("FAIL: string should be unparseable %s %s", spelledNumber, u_errorName(status));
+        }
+    }
+}
+
  void 
  IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* const testData[][2], UBool testParsing) 
  {
diff --git a/icu4c/source/test/intltest/itrbnf.h b/icu4c/source/test/intltest/itrbnf.h

index 540b8033342b2e446fdba793f78fc4adf9e944f4..e58d321362cba303a1de9656d1b8a66863c8656c 100644 (file)
--- a/icu4c/source/test/intltest/itrbnf.h
+++ b/icu4c/source/test/intltest/itrbnf.h
@@ -147,6 +147,7 @@ class IntlTestRBNF : public IntlTest {
      void TestRounding();
      void TestLargeNumbers();
      void TestCompactDecimalFormatStyle();
+    void TestParseFailure();
  
  protected:
    virtual void doTest(RuleBasedNumberFormat* formatter, const char* const testData[][2], UBool testParsing);
diff --git a/icu4c/source/test/intltest/numberformattesttuple.cpp b/icu4c/source/test/intltest/numberformattesttuple.cpp

index 01c2815d5ec9584f77a00297aadeb422bdd1f773..496aaeccde28214b4153ae902b8aeb5df62aec6f 100644 (file)
--- a/icu4c/source/test/intltest/numberformattesttuple.cpp
+++ b/icu4c/source/test/intltest/numberformattesttuple.cpp
@@ -325,6 +325,7 @@ const NumberFormatTestTupleFieldData gFieldData[] = {
      FIELD_INIT(positiveSuffix, &gStrOps),
      FIELD_INIT(negativePrefix, &gStrOps),
      FIELD_INIT(negativeSuffix, &gStrOps),
+    FIELD_INIT(signAlwaysShown, &gIntOps),
      FIELD_INIT(localizedPattern, &gStrOps),
      FIELD_INIT(toPattern, &gStrOps),
      FIELD_INIT(toLocalizedPattern, &gStrOps),
diff --git a/icu4c/source/test/intltest/numberformattesttuple.h b/icu4c/source/test/intltest/numberformattesttuple.h

index f417b3ef995782a0591ed64a86fe59e5c23929d5..685c3d698e1713afbb6e4b4454acf08d6850770b 100644 (file)
--- a/icu4c/source/test/intltest/numberformattesttuple.h
+++ b/icu4c/source/test/intltest/numberformattesttuple.h
@@ -55,6 +55,7 @@ enum ENumberFormatTestTupleField {
      kPositiveSuffix,
      kNegativePrefix,
      kNegativeSuffix,
+    kSignAlwaysShown,
      kLocalizedPattern,
      kToPattern,
      kToLocalizedPattern,
@@ -118,6 +119,7 @@ public:
      UnicodeString positiveSuffix;
      UnicodeString negativePrefix;
      UnicodeString negativeSuffix;
+    int32_t signAlwaysShown;
      UnicodeString localizedPattern;
      UnicodeString toPattern;
      UnicodeString toLocalizedPattern;
@@ -164,6 +166,7 @@ public:
      UBool positiveSuffixFlag;
      UBool negativePrefixFlag;
      UBool negativeSuffixFlag;
+    UBool signAlwaysShownFlag;
      UBool localizedPatternFlag;
      UBool toPatternFlag;
      UBool toLocalizedPatternFlag;
diff --git a/icu4c/source/test/intltest/numbertest_api.cpp b/icu4c/source/test/intltest/numbertest_api.cpp

index 2d625877f304b0725dfa6f3830a4ae95b7f0d82f..783bb00e2f615dab2e444590014b2e4e402af361 100644 (file)
--- a/icu4c/source/test/intltest/numbertest_api.cpp
+++ b/icu4c/source/test/intltest/numbertest_api.cpp
@@ -1097,6 +1097,20 @@ void NumberFormatterApiTest::grouping() {
              u"8.765",
              u"0");
  
+    assertFormatDescendingBig(
+            u"Indic locale with THOUSANDS grouping",
+            NumberFormatter::with().grouping(UNUM_GROUPING_THOUSANDS),
+            Locale("en-IN"),
+            u"87,650,000",
+            u"8,765,000",
+            u"876,500",
+            u"87,650",
+            u"8,765",
+            u"876.5",
+            u"87.65",
+            u"8.765",
+            u"0");
+
      // NOTE: Hungarian is interesting because it has minimumGroupingDigits=4 in locale data
      // If this test breaks due to data changes, find another locale that has minimumGroupingDigits.
      assertFormatDescendingBig(
diff --git a/icu4c/source/test/intltest/numfmtst.cpp b/icu4c/source/test/intltest/numfmtst.cpp

index 78b1029cf28663cdfcd83a20657f9be56a996b50..eac5cf828086dfcca8e70b480904079d2fef5252 100644 (file)
--- a/icu4c/source/test/intltest/numfmtst.cpp
+++ b/icu4c/source/test/intltest/numfmtst.cpp
@@ -229,6 +229,9 @@ static void adjustDecimalFormat(
      if (tuple.negativeSuffixFlag) {
          fmt.setNegativeSuffix(tuple.negativeSuffix);
      }
+    if (tuple.signAlwaysShownFlag) {
+        // Not currently supported
+    }
      if (tuple.localizedPatternFlag) {
          UErrorCode status = U_ZERO_ERROR;
          fmt.applyLocalizedPattern(tuple.localizedPattern, status);
diff --git a/icu4c/source/test/testdata/numberformattestspecification.txt b/icu4c/source/test/testdata/numberformattestspecification.txt

index 113473a2a57f849223ac41cd03261489f605482e..afddf3156984cd4c9caf7dcecb1e90955131342b 100644 (file)
--- a/icu4c/source/test/testdata/numberformattestspecification.txt
+++ b/icu4c/source/test/testdata/numberformattestspecification.txt
@@ -441,11 +441,10 @@ en_US     1       123,456 123456
  en_US  0       123,456 123
  en_US  1       123.456 123.456
  en_US  0       123.456 123.456
-fr_FR  1       123,456 123.456
-fr_FR  0       123,456 123.456
-// JDK returns 123 here; not sure why.
-fr_FR  1       123.456 123456  K
-fr_FR  0       123.456 123
+it_IT  1       123,456 123.456
+it_IT  0       123,456 123.456
+it_IT  1       123.456 123456
+it_IT  0       123.456 123
  
  test no grouping in pattern with parsing
  set pattern 0
@@ -466,9 +465,8 @@ output      grouping        breaks  grouping2       minGroupingDigits
  1,2345,6789    4
  1,23,45,6789   4       K       2
  1,23,45,6789   4       K       2       2
-// Q only supports minGrouping<=2
  123,456789     6               6       3
-123456789      6       JKQ     6       4
+123456789      6       JK      6       4
  
  test multiplier setters
  set locale en_US
@@ -754,6 +752,7 @@ parse       output  breaks
  +3.52EE4       3.52
  +1,234,567.8901        1234567.8901
  +1,23,4567.8901        1234567.8901
+// Fraction grouping is disabled by default
  +1,23,4567.89,01       1234567.89
  +1,23,456.78.9 123456.78
  +12.34,56      12.34
@@ -831,15 +830,14 @@ parse     output  breaks
  // JDK does allow separators in the wrong place and parses as -5347.25
  (53,47.25)     fail    K
  // strict requires prefix or suffix, except in C
-65,347.25      fail    
+65,347.25      fail
  +3.52E4        35200
  (34.8E-3)      -0.0348
  (3425E-1)      -342.5
  // Strict doesn't allow separators in sci notation.
  (63,425)       -63425
-// JDK and S allow separators in sci notation and parses as -342.5
-// C passes
-(63,425E-1)    fail    CKS
+// J does not allow grouping separators in scientific notation.
+(63,425E-1)    -6342.5 J
  // Both prefix and suffix needed for strict.
  // JDK accepts this and parses as -342.5
  (3425E-1       fail    K
@@ -954,12 +952,12 @@ set negativeSuffix 9N
  begin
  parse  output  breaks
  // S is the only implementation that passes these cases.
-// C consumes the '9' as a digit and assumes number is negative
+// C and P consume the '9' as a digit and assumes number is negative
  // J and JDK bail
-6549K  654     CJK
-// C consumes the '9' as a digit and assumes number is negative
+6549K  654     CJKP
+// C and P consume the '9' as a digit and assumes number is negative
  // J and JDK bail
-6549N  -654    CJK
+6549N  -654    CJKP
  
  test really strange prefix
  set locale en
@@ -974,7 +972,7 @@ test parse pattern with quotes
  set locale en
  set pattern '-'#y
  begin
-parse  output
+parse  output  breaks
  -45y   45
  
  test parse with locale symbols
@@ -1187,17 +1185,17 @@ $53.45  fail    USD     J
  USD 53.45      53.45   USD     J
  53.45USD       53.45   USD     CJ
  USD53.45       53.45   USD
-// S fails these because '(' is an incomplete prefix.
-(7.92) USD     -7.92   USD     CJS
-(7.92) GBP     -7.92   GBP     CJS
-(7.926) USD    -7.926  USD     CJS
-(7.926 USD)    -7.926  USD     CJS
+// P fails these because '(' is an incomplete prefix.
+(7.92) USD     -7.92   USD     CJP
+(7.92) GBP     -7.92   GBP     CJP
+(7.926) USD    -7.926  USD     CJP
+(7.926 USD)    -7.926  USD     CJP
  (USD 7.926)    -7.926  USD     J
-USD (7.926)    -7.926  USD     CJS
-USD (7.92)     -7.92   USD     CJS
-(7.92)USD      -7.92   USD     CJS
-USD(7.92)      -7.92   USD     CJS
-(8) USD        -8      USD     CJS
+USD (7.926)    -7.926  USD     CJP
+USD (7.92)     -7.92   USD     CJP
+(7.92)USD      -7.92   USD     CJP
+USD(7.92)      -7.92   USD     CJP
+(8) USD        -8      USD     CJP
  -8 USD -8      USD     C
  67 USD 67      USD     C
  53.45$ fail    USD
@@ -1223,37 +1221,38 @@ test parse foreign currency symbol
  set pattern \u00a4 0.00;\u00a4 -#
  set locale fa_IR
  begin
-parse  output  outputCurrency
+parse  output  outputCurrency  breaks
  \u0631\u06cc\u0627\u0644 \u06F1\u06F2\u06F3\u06F5      1235    IRR
  IRR \u06F1\u06F2\u06F3\u06F5   1235    IRR
-\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 \u06F1\u06F2\u06F3\u06F5       1235    IRR
+// P fails here because this currency name is in the Trie only, but it has the same prefix as the non-Trie currency
+\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 \u06F1\u06F2\u06F3\u06F5       1235    IRR     P
  IRR 1235       1235    IRR
  \u0631\u06cc\u0627\u0644 1235  1235    IRR
-\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 1235   1235    IRR
+\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 1235   1235    IRR     P
  
  test parse foreign currency ISO
  set pattern \u00a4\u00a4 0.00;\u00a4\u00a4 -#
  set locale fa_IR
  begin
-parse  output  outputCurrency
+parse  output  outputCurrency  breaks
  \u0631\u06cc\u0627\u0644 \u06F1\u06F2\u06F3\u06F5      1235    IRR
  IRR \u06F1\u06F2\u06F3\u06F5   1235    IRR
-\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 \u06F1\u06F2\u06F3\u06F5       1235    IRR
+\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 \u06F1\u06F2\u06F3\u06F5       1235    IRR     P
  IRR 1235       1235    IRR
  \u0631\u06cc\u0627\u0644 1235  1235    IRR
-\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 1235   1235    IRR
+\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 1235   1235    IRR     P
  
  test parse foreign currency full
  set pattern \u00a4\u00a4\u00a4 0.00;\u00a4\u00a4\u00a4 -#
  set locale fa_IR
  begin
-parse  output  outputCurrency
+parse  output  outputCurrency  breaks
  \u0631\u06cc\u0627\u0644 \u06F1\u06F2\u06F3\u06F5      1235    IRR
  IRR \u06F1\u06F2\u06F3\u06F5   1235    IRR
-\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 \u06F1\u06F2\u06F3\u06F5       1235    IRR
+\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 \u06F1\u06F2\u06F3\u06F5       1235    IRR     P
  IRR 1235       1235    IRR
  \u0631\u06cc\u0627\u0644 1235  1235    IRR
-\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 1235   1235    IRR
+\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 1235   1235    IRR     P
  
  test parse currency with foreign symbols symbol english
  set pattern \u00a4 0.00;\u00a4 (#)
@@ -1288,16 +1287,17 @@ Euros 7.82      7.82    EUR
  test parse currency without currency mode
  // Should accept a symbol associated with the currency specified by the API,
  // but should not traverse the full currency data.
+// P always traverses full currency data.
  set locale en_US
  set pattern \u00a4#,##0.00
  begin
  parse  currency        output  breaks
  $52.41 USD     52.41
  USD52.41       USD     52.41   K
-\u20ac52.41    USD     fail
-EUR52.41       USD     fail
-$52.41 EUR     fail
-USD52.41       EUR     fail
+\u20ac52.41    USD     fail    P
+EUR52.41       USD     fail    P
+$52.41 EUR     fail    P
+USD52.41       EUR     fail    P
  \u20ac52.41    EUR     52.41   K
  EUR52.41       EUR     52.41
  
@@ -1307,11 +1307,11 @@ set locale en_US
  set lenient 0
  begin
  parse  output  outputCurrency  breaks
-$53.45 53.45   USD
+$53.45 53.45   USD     P
  53.45 USD      53.45   USD
  USD 53.45      fail    USD
  53.45USD       fail    USD
-USD53.45       53.45   USD
+USD53.45       53.45   USD     P
  (7.92) USD     -7.92   USD
  (7.92) EUR     -7.92   EUR
  (7.926) USD    -7.926  USD
@@ -1329,9 +1329,9 @@ US Dollars 53.45  fail    USD
  53.45 US Dollars       53.45   USD
  US Dollar 53.45        fail    USD
  53.45 US Dollar        53.45   USD
-US Dollars53.45        53.45   USD
+US Dollars53.45        53.45   USD     P
  53.45US Dollars        fail    USD
-US Dollar53.45 53.45   USD
+US Dollar53.45 53.45   USD     P
  US Dollat53.45 fail    USD
  53.45US Dollar fail    USD
  US Dollars (53.45)     fail    USD
@@ -1376,13 +1376,15 @@ test parse minus sign
  set locale en
  set pattern #
  begin
-parse  output  breaks
--123   -123
-- 123  -123    JK
- -123  -123    JK
- - 123 -123    JK
-123-   -123    CJKS
-123 -  -123    CJKS
+pattern        parse   output  breaks
+#      -123    -123
+#      - 123   -123    JK
+#       -123   -123    JK
+#       - 123  -123    JK
+#      123-    123
+#      123 -   123
+#;#-   123-    -123
+#;#-   123 -   -123    JK
  
  test parse case sensitive
  set locale en
@@ -1423,8 +1425,8 @@ NaN       NaN     K
  1E2147483646   1E2147483646
  1E-2147483649  0
  1E-2147483648  0
-// S returns zero here
-1E-2147483647  1E-2147483647   S
+// P returns zero here
+1E-2147483647  1E-2147483647   P
  1E-2147483646  1E-2147483646
  
  test format push limits
@@ -1439,7 +1441,7 @@ maxFractionDigits format  output  breaks
  100    9999999999999.9950000000001     9999999999999.9950000000001     C
  2      9999999999999.9950000000001     10000000000000.00       C
  2      9999999.99499999        9999999.99
-// K doesn't support halfDowm rounding mode?
+// K doesn't support halfDown rounding mode?
  2      9999999.995     9999999.99      K
  2      9999999.99500001        10000000.00
  100    56565656565656565656565656565656565656565656565656565656565656  56565656565656565656565656565656565656565656565656565656565656.00       C
@@ -1453,8 +1455,8 @@ set locale en
  set pattern #,##0
  begin
  parse  output  breaks
-// K and J return null; S and C return 99
- 9 9   9       CJKS
+// K and J return null; S, C, and P return 99
+ 9 9   9       CJKP
  // K returns null
   9 999 9999    K
  
@@ -1497,7 +1499,7 @@ y g h56   -56     JK
  56i j‎k      -56     CJK
  56‎i jk      -56     CJK
  // S and C get 56 (accepts ' ' gs grouping); J and K get null
-5 6    fail    CS
+5 6    fail    CP
  5‎6  5       JK
  
  test parse spaces in grouping
@@ -1507,9 +1509,9 @@ set locale en
  set pattern #,##0
  begin
  parse  output  breaks
-// C, J and S get "12" here
-1 2    1       CJS
-1 23   1       CJS
+// C, J, S, and P get "12" here
+1 2    1       CJP
+1 23   1       CJP
  // K gets 1 here; doesn't pick up the grouping separator
  1 234  1234    K
  
@@ -1543,7 +1545,8 @@ begin
  parse  output  breaks
  55%    0.55
  // J and K get null
-55     0.55    JK
+// P requires the symbol to be present and gets 55
+55     0.55    JKP
  
  test trailing grouping separators in pattern
  // This test is for #13115
@@ -1573,6 +1576,34 @@ begin
  parse  output  breaks
  9223372036854775807%   92233720368547758.07
  
+test sign always shown
+set locale en
+set pattern 0
+set signAlwaysShown 1
+begin
+format output  breaks
+// C, J and K do not support this feature
+42     +42     CJK
+0      +0      CJK
+-42    -42
+
+test parse strict with plus sign
+set locale en
+set pattern 0
+set signAlwaysShown 1
+begin
+lenient        parse   output  breaks
+1      42      42
+1      -42     -42
+1      +42     42      CJK
+1      0       0
+1      +0      0       CJK
+0      42      fail    CJK
+0      -42     -42
+0      +42     42      CJK
+0      0       fail    CJK
+0      +0      0       CJK
+
author	Andy Heninger <andy.heninger@gmail.com>
	Wed, 14 Feb 2018 23:55:39 +0000 (23:55 +0000)
committer	Andy Heninger <andy.heninger@gmail.com>
	Wed, 14 Feb 2018 23:55:39 +0000 (23:55 +0000)
.gitattributes		patch \| blob \| history
.gitignore		patch \| blob \| history
icu4c/LICENSE		patch \| blob \| history
icu4c/packaging/distrelease.ps1		patch \| blob \| history
icu4c/source/common/ucnv_u8.cpp		patch \| blob \| history
icu4c/source/common/unicode/platform.h		patch \| blob \| history
icu4c/source/common/unicode/utf8.h		patch \| blob \| history
icu4c/source/common/utf_impl.cpp		patch \| blob \| history
icu4c/source/config/dist.mk		patch \| blob \| history
icu4c/source/i18n/islamcal.cpp		patch \| blob \| history
icu4c/source/i18n/nfrs.cpp		patch \| blob \| history
icu4c/source/i18n/nfrs.h		patch \| blob \| history
icu4c/source/i18n/nfrule.cpp		patch \| blob \| history
icu4c/source/i18n/nfrule.h		patch \| blob \| history
icu4c/source/i18n/nfsubs.cpp		patch \| blob \| history
icu4c/source/i18n/nfsubs.h		patch \| blob \| history
icu4c/source/i18n/number_fluent.cpp		patch \| blob \| history
icu4c/source/i18n/number_grouping.cpp		patch \| blob \| history
icu4c/source/i18n/rbnf.cpp		patch \| blob \| history
icu4c/source/i18n/unicode/numberformatter.h		patch \| blob \| history
icu4c/source/test/cintltst/utf8tst.c		patch \| blob \| history
icu4c/source/test/intltest/calregts.cpp		patch \| blob \| history
icu4c/source/test/intltest/calregts.h		patch \| blob \| history
icu4c/source/test/intltest/itrbnf.cpp		patch \| blob \| history
icu4c/source/test/intltest/itrbnf.h		patch \| blob \| history
icu4c/source/test/intltest/numberformattesttuple.cpp		patch \| blob \| history
icu4c/source/test/intltest/numberformattesttuple.h		patch \| blob \| history
icu4c/source/test/intltest/numbertest_api.cpp		patch \| blob \| history
icu4c/source/test/intltest/numfmtst.cpp		patch \| blob \| history
icu4c/source/test/testdata/numberformattestspecification.txt		patch \| blob \| history