*.tri2 -text
icu4c/icu4c.css -text
-icu4c/packaging/distrelease.ps1 -text
icu4c/source/aclocal.m4 -text
icu4c/source/config/m4/icu-conditional.m4 -text
icu4c/source/data/curr/pool.res -text
icu4c/source/tools/ctestfw/release
icu4c/source/tools/ctestfw/x64
icu4c/source/tools/ctestfw/x86
+icu4c/source/tools/escapesrc/*.d
+icu4c/source/tools/escapesrc/Makefile
icu4c/source/tools/genbrk/*.d
icu4c/source/tools/genbrk/*.o
icu4c/source/tools/genbrk/*.pdb
# by ICANN or the IETF Trust on the database or the code. Any person
# making a contribution to the database or code waives all rights to
# future claims in that contribution or in the TZ Database.
+
+6. Google double-conversion
+
+Copyright 2006-2011, the V8 project authors. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+ * Neither the name of Google Inc. nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-# Copyright (C) 2016 and later: Unicode, Inc. and others.\r
-# License & terms of use: http://www.unicode.org/copyright.html\r
-#-------------------------\r
-# Script: icu\packaging\distrelease.ps1\r
-# Author: Steven R. Loomis\r
-# Date: 2017-04-14\r
-#-------------------------\r
-#\r
-# This builds a zipfile containing the *64 bit* Windows binary\r
-#\r
-# Usage: (after building ICU using MSVC) \r
-# (bring up Powershell ISE)\r
-# cd C:\icu\icu4c\\r
-# Set-ExecutionPolicy -Scope Process AllSigned\r
-# .\packaging\distrelease.ps1\r
-#\r
-# Will emit: c:\icu4c\icu\source\dist\icu-windows.zip\r
-#\r
-#\r
-# You will get warnings from the execution policy and the script itself.\r
-# see https://docs.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_execution_policies?view=powershell-5.1&viewFallbackFrom=powershell-Microsoft.PowerShell.Core \r
-# for more about execution policies.\r
-\r
-\r
-$icuDir = Split-Path -Path $MyInvocation.MyCommand.Definition -Parent\r
-$icuDir = Resolve-Path -Path '$icuDir\..'\r
-\r
-echo $icuDir\r
-\r
-# ok, create some work areas\r
-New-Item -Path "$icuDir\source\dist" -ErrorAction SilentlyContinue -ItemType "directory"\r
-$source = "$icuDir\source\dist\icu"\r
-Get-ChildItem -Path $source -ErrorAction SilentlyContinue | Remove-Item -Recurse\r
-New-Item -Path $source -ItemType "directory" -ErrorAction SilentlyContinue\r
-\r
-# copy required stuff\r
-Copy-Item -Path "$icuDir\lib64" -Destination $source -Recurse\r
-Copy-Item -Path "$icuDir\include" -Destination $source -Recurse\r
-Copy-Item -Path "$icuDir\bin64" -Destination $source -Recurse\r
-Copy-Item -Path "$icuDir\APIChangeReport.html" -Destination $source -Recurse\r
-Copy-Item -Path "$icuDir\icu4c.css" -Destination $source -Recurse\r
-Copy-Item -Path "$icuDir\LICENSE" -Destination $source -Recurse\r
-Copy-Item -Path "$icuDir\readme.html" -Destination $source -Recurse\r
-\r
-\r
-$destination = "$icuDir\source\dist\icu-windows.zip"\r
-Remove-Item -Path $destination -ErrorAction Continue\r
-Add-Type -assembly "system.io.compression.filesystem"\r
-Echo $source\r
-Echo $destination\r
-[io.compression.zipfile]::CreateFromDirectory($source, $destination)\r
-\r
+# Copyright (C) 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+#-------------------------
+# Script: icu\packaging\distrelease.ps1
+# Author: Steven R. Loomis
+# Date: 2017-04-14
+#-------------------------
+#
+# This builds a zipfile containing the *64 bit* Windows binary
+#
+# Usage: (after building ICU using MSVC)
+# (bring up Powershell ISE)
+# cd C:\icu\icu4c\
+# Set-ExecutionPolicy -Scope Process AllSigned
+# .\packaging\distrelease.ps1
+#
+# Will emit: c:\icu4c\icu\source\dist\icu-windows.zip
+#
+#
+# You will get warnings from the execution policy and the script itself.
+# see https://docs.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_execution_policies?view=powershell-5.1&viewFallbackFrom=powershell-Microsoft.PowerShell.Core
+# for more about execution policies.
+
+
+$icuDir = Split-Path -Path $MyInvocation.MyCommand.Definition -Parent
+$icuDir = Resolve-Path -Path '$icuDir\..'
+
+echo $icuDir
+
+# ok, create some work areas
+New-Item -Path "$icuDir\source\dist" -ErrorAction SilentlyContinue -ItemType "directory"
+$source = "$icuDir\source\dist\icu"
+Get-ChildItem -Path $source -ErrorAction SilentlyContinue | Remove-Item -Recurse
+New-Item -Path $source -ItemType "directory" -ErrorAction SilentlyContinue
+
+# copy required stuff
+Copy-Item -Path "$icuDir\lib64" -Destination $source -Recurse
+Copy-Item -Path "$icuDir\include" -Destination $source -Recurse
+Copy-Item -Path "$icuDir\bin64" -Destination $source -Recurse
+Copy-Item -Path "$icuDir\APIChangeReport.html" -Destination $source -Recurse
+Copy-Item -Path "$icuDir\icu4c.css" -Destination $source -Recurse
+Copy-Item -Path "$icuDir\LICENSE" -Destination $source -Recurse
+Copy-Item -Path "$icuDir\readme.html" -Destination $source -Recurse
+
+
+$destination = "$icuDir\source\dist\icu-windows.zip"
+Remove-Item -Path $destination -ErrorAction Continue
+Add-Type -assembly "system.io.compression.filesystem"
+Echo $source
+Echo $destination
+[io.compression.zipfile]::CreateFromDirectory($source, $destination)
+
echo $destination
\ No newline at end of file
// Use a single counter for source and target, counting the minimum of
// the source length and the target capacity.
// Let the standard converter handle edge cases.
- const uint8_t *limit=sourceLimit;
if(count>targetCapacity) {
- limit-=(count-targetCapacity);
count=targetCapacity;
}
- // The conversion loop checks count>0 only once per 1/2/3-byte character.
- // If the buffer ends with a truncated 2- or 3-byte sequence,
+ // The conversion loop checks count>0 only once per character.
+ // If the buffer ends with a truncated sequence,
// then we reduce the count to stop before that,
// and collect the remaining bytes after the conversion loop.
- {
- // Do not go back into the bytes that will be read for finishing a partial
- // sequence from the previous buffer.
- int32_t length=count-toULimit;
- if(length>0) {
- uint8_t b1=*(limit-1);
- if(U8_IS_SINGLE(b1)) {
- // common ASCII character
- } else if(U8_IS_TRAIL(b1) && length>=2) {
- uint8_t b2=*(limit-2);
- if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
- // truncated 3-byte sequence
- count-=2;
- }
- } else if(0xc2<=b1 && b1<0xf0) {
- // truncated 2- or 3-byte sequence
- --count;
- }
- }
- }
+
+ // Do not go back into the bytes that will be read for finishing a partial
+ // sequence from the previous buffer.
+ int32_t length=count-toULimit;
+ U8_TRUNCATE_IF_INCOMPLETE(source, 0, length);
+ count=toULimit+length;
}
if(c!=0) {
}
/* copy the legal byte sequence to the target */
- if(count>=toULength) {
+ {
int8_t i;
for(i=0; i<oldToULength; ++i) {
*target++=*source++;
}
count-=toULength;
- } else {
- // A supplementary character that does not fit into the target.
- // Let the standard converter handle this.
- source-=(toULength-oldToULength);
- pToUArgs->source=(char *)source;
- pFromUArgs->target=(char *)target;
- *pErrorCode=U_USING_DEFAULT_WARNING;
- return;
}
}
}
utf8->toULength=toULength;
utf8->mode=toULimit;
break;
- } else if(!U8_IS_TRAIL(b=*source)) {
- /* lead byte in trail byte position */
+ } else if(!icu::UTF8::isValidTrail(c, b=*source, toULength, toULimit)) {
utf8->toULength=toULength;
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
break;
*/
#ifdef U_CHARSET_IS_UTF8
/* Use the predefined value. */
-#elif U_PLATFORM == U_PF_ANDROID || U_PLATFORM_IS_DARWIN_BASED
+#elif U_PLATFORM_IS_LINUX_BASED || U_PLATFORM_IS_DARWIN_BASED
# define U_CHARSET_IS_UTF8 1
#else
# define U_CHARSET_IS_UTF8 0
#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) { \
(c)=(uint8_t)(s)[(i)++]; \
if(!U8_IS_SINGLE(c)) { \
- uint8_t __t; \
+ uint8_t __t = 0; \
if((i)!=(length) && \
/* fetch/validate/assemble all but last trail byte */ \
((c)>=0xe0 ? \
* If the offset points to a UTF-8 trail byte,
* then the offset is moved backward to the corresponding lead byte.
* Otherwise, it is not modified.
+ *
* "Safe" macro, checks for illegal sequences and for string boundaries.
+ * Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i].
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, must be start<=i
* @see U8_SET_CP_START_UNSAFE
+ * @see U8_TRUNCATE_IF_INCOMPLETE
* @stable ICU 2.4
*/
#define U8_SET_CP_START(s, start, i) { \
} \
}
+/**
+ * If the string ends with a UTF-8 byte sequence that is valid so far
+ * but incomplete, then reduce the length of the string to end before
+ * the lead byte of that incomplete sequence.
+ * For example, if the string ends with E1 80, the length is reduced by 2.
+ *
+ * Useful for processing text split across multiple buffers
+ * (save the incomplete sequence for later)
+ * and for optimizing iteration
+ * (check for string length only once per character).
+ *
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ * Unlike U8_SET_CP_START(), this macro never reads s[length].
+ *
+ * (In UTF-16, simply check for U16_IS_LEAD(last code unit).)
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param length int32_t string length, must be start<=length
+ * @see U8_SET_CP_START
+ * @draft ICU 61
+ */
+#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) \
+ if((length)>(start)) { \
+ uint8_t __b1=s[(length)-1]; \
+ if(U8_IS_SINGLE(__b1)) { \
+ /* common ASCII character */ \
+ } else if(U8_IS_LEAD(__b1)) { \
+ --(length); \
+ } else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
+ uint8_t __b2=s[(length)-2]; \
+ if(0xe0<=__b2 && __b2<=0xf4) { \
+ if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
+ U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
+ (length)-=2; \
+ } \
+ } else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
+ uint8_t __b3=s[(length)-3]; \
+ if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
+ (length)-=3; \
+ } \
+ } \
+ } \
+ }
+
/* definitions with backward iteration -------------------------------------- */
/**
int32_t i=*pi;
if(U8_IS_TRAIL(c) && i>start) {
uint8_t b1=s[--i];
- if(0xc2<=b1 && b1<0xe0) {
- *pi=i;
- return ((b1-0xc0)<<6)|(c&0x3f);
+ if(U8_IS_LEAD(b1)) {
+ if(b1<0xe0) {
+ *pi=i;
+ return ((b1-0xc0)<<6)|(c&0x3f);
+ } else if(b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c)) {
+ // Truncated 3- or 4-byte sequence.
+ *pi=i;
+ return errorValue(1, strict);
+ }
} else if(U8_IS_TRAIL(b1) && i>start) {
// Extract the value bits from the last trail byte.
c&=0x3f;
uint8_t b2=s[--i];
- if(0xe0<=b2 && b2<0xf0) {
- b2&=0xf;
- if(strict!=-2) {
- if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
- *pi=i;
- c=(b2<<12)|((b1&0x3f)<<6)|c;
- if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) {
- return c;
- } else {
- // strict: forbid non-characters like U+fffe
- return errorValue(2, strict);
+ if(0xe0<=b2 && b2<=0xf4) {
+ if(b2<0xf0) {
+ b2&=0xf;
+ if(strict!=-2) {
+ if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
+ *pi=i;
+ c=(b2<<12)|((b1&0x3f)<<6)|c;
+ if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) {
+ return c;
+ } else {
+ // strict: forbid non-characters like U+fffe
+ return errorValue(2, strict);
+ }
+ }
+ } else {
+ // strict=-2 -> lenient: allow surrogates
+ b1-=0x80;
+ if((b2>0 || b1>=0x20)) {
+ *pi=i;
+ return (b2<<12)|(b1<<6)|c;
}
}
- } else {
- // strict=-2 -> lenient: allow surrogates
- b1-=0x80;
- if((b2>0 || b1>=0x20)) {
- *pi=i;
- return (b2<<12)|(b1<<6)|c;
- }
+ } else if(U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
+ // Truncated 4-byte sequence.
+ *pi=i;
+ return errorValue(2, strict);
}
} else if(U8_IS_TRAIL(b2) && i>start) {
uint8_t b3=s[--i];
}
}
}
- } else if(0xf0<=b2 && b2<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
- // Truncated 4-byte sequence.
- *pi=i;
- return errorValue(2, strict);
}
- } else if((0xe0<=b1 && b1<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b1, c)) ||
- (0xf0<=b1 && b1<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b1, c))) {
- // Truncated 3- or 4-byte sequence.
- *pi=i;
- return errorValue(1, strict);
}
}
return errorValue(0, strict);
uint8_t c=s[i];
if(U8_IS_TRAIL(c) && i>start) {
uint8_t b1=s[--i];
- if(0xc2<=b1 && b1<0xe0) {
- return i;
+ if(U8_IS_LEAD(b1)) {
+ if(b1<0xe0 ||
+ (b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c))) {
+ return i;
+ }
} else if(U8_IS_TRAIL(b1) && i>start) {
uint8_t b2=s[--i];
- if(0xe0<=b2 && b2<0xf0) {
- if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
+ if(0xe0<=b2 && b2<=0xf4) {
+ if(b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b2, b1) : U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
return i;
}
} else if(U8_IS_TRAIL(b2) && i>start) {
uint8_t b3=s[--i];
- if(0xf0<=b3 && b3<=0xf4) {
- if(U8_IS_VALID_LEAD4_AND_T1(b3, b2)) {
- return i;
- }
+ if(0xf0<=b3 && b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b3, b2)) {
+ return i;
}
- } else if(0xf0<=b2 && b2<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
- // Truncated 4-byte sequence.
- return i;
}
- } else if((0xe0<=b1 && b1<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b1, c)) ||
- (0xf0<=b1 && b1<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b1, c))) {
- // Truncated 3- or 4-byte sequence.
- return i;
}
}
return orig_i;
DISTY_TMP=dist/tmp
DISTY_ICU=$(DISTY_TMP)/icu
DISTY_DATA=$(DISTY_ICU)/source/data
-DISTY_RMV=brkitr coll curr lang locales mappings rbnf region translit xml zone misc unit
+DISTY_RMV=brkitr coll curr lang locales mappings rbnf region translit xml zone misc/*.txt misc/*.mk unit
DISTY_RMDIR=$(DISTY_RMV:%=$(DISTY_DATA)/%)
DISTY_IN=$(DISTY_DATA)/in
DOCZIP=icu-docs.zip
$(DISTY_DOC_ZIP): $(DOCZIP) $(DISTY_FILE_DIR)
cp $(DOCZIP) $(DISTY_DOC_ZIP)
- ln -sf $(DISTY_DOC_ZIP) $(DISTY_FILE_DIR)/icu4c-docs.zip
+ ln -sf $(shell basename $(DISTY_DOC_ZIP)) $(DISTY_FILE_DIR)/icu4c-docs.zip
$(DISTY_DAT):
echo Missing $@
$(MKINSTALLDIRS) $(DISTY_IN)
echo DISTY_DAT=$(DISTY_DAT)
cp $(DISTY_DAT) $(DISTY_IN)
- ( cd $(DISTY_TMP)/icu ; python as_is/bomlist.py > as_is/bomlist.txt || rm -f as_is/bomlist.txt )
- ( cd $(DISTY_TMP) ; zip -rlq $(DISTY_FILE_ZIP) icu )
$(RMV) $(DISTY_RMDIR)
( cd $(DISTY_TMP)/icu ; python as_is/bomlist.py > as_is/bomlist.txt || rm -f as_is/bomlist.txt )
( cd $(DISTY_TMP) ; tar cfpz $(DISTY_FILE_TGZ) icu )
- ln -sf $(DISTY_FILE_ZIP) $(DISTY_FILE_DIR)/icu4c-src.zip
- ln -sf $(DISTY_FILE_TGZ) $(DISTY_FILE_DIR)/icu4c-src.tgz
- ln -sf $(DISTY_DATA_ZIP) $(DISTY_FILE_DIR)/icu4c-data.zip
+ ( cd $(DISTY_TMP) ; zip -rlq $(DISTY_FILE_ZIP) icu )
+ $(RMV) $(DISTY_TMP)
+ ln -sf $(shell basename $(DISTY_FILE_ZIP)) $(DISTY_FILE_DIR)/icu4c-src.zip
+ ln -sf $(shell basename $(DISTY_FILE_TGZ)) $(DISTY_FILE_DIR)/icu4c-src.tgz
+ ln -sf $(shell basename $(DISTY_DATA_ZIP)) $(DISTY_FILE_DIR)/icu4c-data.zip
ls -l $(DISTY_FILE_TGZ) $(DISTY_FILE_ZIP) $(DISTY_DATA_ZIP)
days = julianDay - ASTRONOMICAL_EPOC;
}
// Use the civil calendar approximation, which is just arithmetic
- year = (int)ClockMath::floorDivide( (double)(30 * days + 10646) , 10631.0 );
+ year = (int32_t)ClockMath::floorDivide(30 * (int64_t)days + 10646, (int64_t)10631);
month = (int32_t)uprv_ceil((days - 29 - yearStart(year)) / 29.5 );
month = month<11?month:11;
startDate = monthStart(year, month);
#endif
UBool
-NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
+NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, int32_t nonNumericalExecutedRuleMask, Formattable& result) const
{
// try matching each rule in the rule set against the text being
// parsed. Whichever one matches the most characters is the one
#endif
// Try each of the negative rules, fraction rules, infinity rules and NaN rules
for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
- if (nonNumericalRules[i]) {
+ if (nonNumericalRules[i] && ((nonNumericalExecutedRuleMask >> i) & 1) == 0) {
+ // Mark this rule as being executed so that we don't try to execute it again.
+ nonNumericalExecutedRuleMask |= 1 << i;
+
Formattable tempResult;
- UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
+ UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, tempResult);
if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
result = tempResult;
highWaterMark = workingPos;
continue;
}
Formattable tempResult;
- UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
+ UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, tempResult);
if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
result = tempResult;
highWaterMark = workingPos;
void format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const;
void format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const;
- UBool parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const;
+ UBool parse(const UnicodeString& text, ParsePosition& pos, double upperBound, int32_t nonNumericalExecutedRuleMask, Formattable& result) const;
void appendRules(UnicodeString& result) const; // toString
ParsePosition& parsePosition,
UBool isFractionRule,
double upperBound,
+ int32_t nonNumericalExecutedRuleMask,
Formattable& resVal) const
{
// internally we operate on a copy of the string being parsed
temp.setTo(ruleText, sub1Pos, sub2Pos - sub1Pos);
double partialResult = matchToDelimiter(workText, start, tempBaseValue,
temp, pp, sub1,
+ nonNumericalExecutedRuleMask,
upperBound);
// if we got a successful match (or were trying to match a
temp.setTo(ruleText, sub2Pos, ruleText.length() - sub2Pos);
partialResult = matchToDelimiter(workText2, 0, partialResult,
temp, pp2, sub2,
+ nonNumericalExecutedRuleMask,
upperBound);
// if we got a successful match on this second
const UnicodeString& delimiter,
ParsePosition& pp,
const NFSubstitution* sub,
+ int32_t nonNumericalExecutedRuleMask,
double upperBound) const
{
UErrorCode status = U_ZERO_ERROR;
#else
formatter->isLenient(),
#endif
+ nonNumericalExecutedRuleMask,
result);
// if the substitution could match all the text up to
#else
formatter->isLenient(),
#endif
+ nonNumericalExecutedRuleMask,
result);
if (success && (tempPP.getIndex() != 0)) {
// if there's a successful match (or it's a null
ParsePosition& pos,
UBool isFractional,
double upperBound,
+ int32_t nonNumericalExecutedRuleMask,
Formattable& result) const;
UBool shouldRollBack(int64_t number) const;
int32_t indexOfAnyRulePrefix() const;
double matchToDelimiter(const UnicodeString& text, int32_t startPos, double baseValue,
const UnicodeString& delimiter, ParsePosition& pp, const NFSubstitution* sub,
+ int32_t nonNumericalExecutedRuleMask,
double upperBound) const;
void stripPrefix(UnicodeString& text, const UnicodeString& prefix, ParsePosition& pp) const;
double baseValue,
double upperBound,
UBool lenientParse,
+ int32_t nonNumericalExecutedRuleMask,
Formattable& result) const;
virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const {
double baseValue,
double upperBound,
UBool lenientParse,
+ int32_t nonNumericalExecutedRuleMask,
Formattable& result) const;
virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue + oldRuleValue; }
double baseValue,
double upperBound,
UBool /*lenientParse*/,
+ int32_t nonNumericalExecutedRuleMask,
Formattable& result) const;
virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue / oldRuleValue; }
double baseValue,
double upperBound,
UBool lenientParse,
+ int32_t nonNumericalExecutedRuleMask,
Formattable& result) const
{
#ifdef RBNF_DEBUG
// on), then also try parsing the text using a default-
// constructed NumberFormat
if (ruleSet != NULL) {
- ruleSet->parse(text, parsePosition, upperBound, result);
+ ruleSet->parse(text, parsePosition, upperBound, nonNumericalExecutedRuleMask, result);
if (lenientParse && !ruleSet->isFractionRuleSet() && parsePosition.getIndex() == 0) {
UErrorCode status = U_ZERO_ERROR;
NumberFormat* fmt = NumberFormat::createInstance(status);
double baseValue,
double upperBound,
UBool lenientParse,
+ int32_t nonNumericalExecutedRuleMask,
Formattable& result) const
{
// if this isn't a >>> substitution, we can just use the
// inherited parse() routine to do the parsing
if (ruleToUse == NULL) {
- return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, lenientParse, result);
+ return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, lenientParse, nonNumericalExecutedRuleMask, result);
// but if it IS a >>> substitution, we have to do it here: we
// use the specific rule's doParse() method, and then we have to
// do some of the other work of NFRuleSet.parse()
} else {
- ruleToUse->doParse(text, parsePosition, FALSE, upperBound, result);
+ ruleToUse->doParse(text, parsePosition, FALSE, upperBound, nonNumericalExecutedRuleMask, result);
if (parsePosition.getIndex() != 0) {
UErrorCode status = U_ZERO_ERROR;
double baseValue,
double /*upperBound*/,
UBool lenientParse,
+ int32_t nonNumericalExecutedRuleMask,
Formattable& resVal) const
{
// if we're not in byDigits mode, we can just use the inherited
// doParse()
if (!byDigits) {
- return NFSubstitution::doParse(text, parsePosition, baseValue, 0, lenientParse, resVal);
+ return NFSubstitution::doParse(text, parsePosition, baseValue, 0, lenientParse, nonNumericalExecutedRuleMask, resVal);
// if we ARE in byDigits mode, parse the text one digit at a time
// using this substitution's owning rule set (we do this by setting
while (workText.length() > 0 && workPos.getIndex() != 0) {
workPos.setIndex(0);
Formattable temp;
- getRuleSet()->parse(workText, workPos, 10, temp);
+ getRuleSet()->parse(workText, workPos, 10, nonNumericalExecutedRuleMask, temp);
UErrorCode status = U_ZERO_ERROR;
digit = temp.getLong(status);
// digit = temp.getType() == Formattable::kLong ?
double baseValue,
double upperBound,
UBool /*lenientParse*/,
+ int32_t nonNumericalExecutedRuleMask,
Formattable& result) const
{
// we don't have to do anything special to do the parsing here,
while (workText.length() > 0 && workPos.getIndex() != 0) {
workPos.setIndex(0);
- getRuleSet()->parse(workText, workPos, 1, temp); // parse zero or nothing at all
+ getRuleSet()->parse(workText, workPos, 1, nonNumericalExecutedRuleMask, temp); // parse zero or nothing at all
if (workPos.getIndex() == 0) {
// we failed, either there were no more zeros, or the number was formatted with digits
// either way, we're done
}
// we've parsed off the zeros, now let's parse the rest from our current position
- NFSubstitution::doParse(workText, parsePosition, withZeros ? 1 : baseValue, upperBound, FALSE, result);
+ NFSubstitution::doParse(workText, parsePosition, withZeros ? 1 : baseValue, upperBound, FALSE, nonNumericalExecutedRuleMask, result);
if (withZeros) {
// any base value will do in this case. is there a way to
double baseValue,
double upperBound,
UBool lenientParse,
+ int32_t nonNumericalExecutedRuleMask,
Formattable& result) const;
/**
}
template<typename Derived>
-Derived NumberFormatterSettings<Derived>::adoptUnit(const icu::MeasureUnit *unit) const {
+Derived NumberFormatterSettings<Derived>::adoptUnit(icu::MeasureUnit *unit) const {
Derived copy(*this);
// Just copy the unit into the MacroProps by value, and delete it since we have ownership.
// NOTE: Slicing occurs here. However, CurrencyUnit can be restored from MeasureUnit.
// TimeUnit may be affected, but TimeUnit is not as relevant to number formatting.
if (unit != nullptr) {
+ // TODO: On nullptr, reset to default value?
copy.fMacros.unit = *unit;
delete unit;
}
}
template<typename Derived>
-Derived NumberFormatterSettings<Derived>::adoptPerUnit(const icu::MeasureUnit *perUnit) const {
+Derived NumberFormatterSettings<Derived>::adoptPerUnit(icu::MeasureUnit *perUnit) const {
Derived copy(*this);
// See comments above about slicing and ownership.
if (perUnit != nullptr) {
+ // TODO: On nullptr, reset to default value?
copy.fMacros.perUnit = *perUnit;
delete perUnit;
}
}
template<typename Derived>
-Derived NumberFormatterSettings<Derived>::adoptSymbols(const NumberingSystem *ns) const {
+Derived NumberFormatterSettings<Derived>::adoptSymbols(NumberingSystem *ns) const {
Derived copy(*this);
copy.fMacros.symbols.setTo(ns);
return copy;
return {-2, -2, -3};
case UNUM_GROUPING_ON_ALIGNED:
return {-4, -4, 1};
- case UNUM_GROUPING_WESTERN:
+ case UNUM_GROUPING_THOUSANDS:
return {3, 3, 1};
default:
U_ASSERT(FALSE);
ParsePosition working_pp(0);
Formattable working_result;
- rp->parse(workingText, working_pp, kMaxDouble, working_result);
+ rp->parse(workingText, working_pp, kMaxDouble, 0, working_result);
if (working_pp.getIndex() > high_pp.getIndex()) {
high_pp = working_pp;
high_result = working_result;
* <li>MIN2: 1234 and 12,34,567
* <li>AUTO: 1,234 and 12,34,567
* <li>ON_ALIGNED: 1,234 and 12,34,567
- * <li>WESTERN: 1,234 and 1,234,567
+ * <li>THOUSANDS: 1,234 and 1,234,567
* </ul>
*
* <p>
*
* @draft ICU 61
*/
- UNUM_GROUPING_WESTERN
+ UNUM_GROUPING_THOUSANDS
} UGroupingStrategy;
* All units will be properly localized with locale data, and all units are compatible with notation styles,
* rounding strategies, and other number formatter settings.
*
- * Pass this method any instance of {@link MeasureUnit}. For units of measure:
+ * Pass this method any instance of {@link MeasureUnit}. For units of measure (which often involve the
+ * factory methods that return a pointer):
*
* <pre>
* NumberFormatter::with().adoptUnit(MeasureUnit::createMeter(status))
/**
* Like unit(), but takes ownership of a pointer. Convenient for use with the MeasureFormat factory
- * methods, which return pointers that need ownership.
+ * methods, which return pointers that need ownership. Example:
+ *
+ * <pre>
+ * NumberFormatter::with().adoptUnit(MeasureUnit::createMeter(status))
+ * </pre>
*
* @param unit
* The unit to render.
* @see MeasureUnit
* @draft ICU 60
*/
- Derived adoptUnit(const icu::MeasureUnit *unit) const;
+ Derived adoptUnit(icu::MeasureUnit *unit) const;
/**
* Sets a unit to be used in the denominator. For example, to format "3 m/s", pass METER to the unit and SECOND to
* the perUnit.
*
- * Pass this method any instance of {@link MeasureUnit}. For example:
- *
- * <pre>
- * NumberFormatter::with()
- * .adoptUnit(MeasureUnit::createMeter(status))
- * .adoptPerUnit(MeasureUnit::createSecond(status))
- * </pre>
+ * Pass this method any instance of {@link MeasureUnit}. Since MeasureUnit factory methods return pointers, the
+ * {@link #adoptPerUnit} version of this method is often more useful.
*
* The default is not to display any unit in the denominator.
*
/**
* Like perUnit(), but takes ownership of a pointer. Convenient for use with the MeasureFormat factory
- * methods, which return pointers that need ownership.
+ * methods, which return pointers that need ownership. Example:
+ *
+ * <pre>
+ * NumberFormatter::with()
+ * .adoptUnit(MeasureUnit::createMeter(status))
+ * .adoptPerUnit(MeasureUnit::createSecond(status))
+ * </pre>
*
* @param perUnit
* The unit to render in the denominator.
* @see MeasureUnit
* @draft ICU 61
*/
- Derived adoptPerUnit(const icu::MeasureUnit *perUnit) const;
+ Derived adoptPerUnit(icu::MeasureUnit *perUnit) const;
/**
* Specifies the rounding strategy to use when formatting numbers.
* @see NumberingSystem
* @draft ICU 60
*/
- Derived adoptSymbols(const NumberingSystem *symbols) const;
+ Derived adoptSymbols(NumberingSystem *symbols) const;
/**
* Sets the width of the unit (measure unit or currency). Most common values:
static void TestFwdBackUnsafe(void);
static void TestSetChar(void);
static void TestSetCharUnsafe(void);
+static void TestTruncateIfIncomplete(void);
static void TestAppendChar(void);
static void TestAppend(void);
static void TestSurrogates(void);
addTest(root, &TestFwdBackUnsafe, "utf8tst/TestFwdBackUnsafe");
addTest(root, &TestSetChar, "utf8tst/TestSetChar");
addTest(root, &TestSetCharUnsafe, "utf8tst/TestSetCharUnsafe");
+ addTest(root, &TestTruncateIfIncomplete, "utf8tst/TestTruncateIfIncomplete");
addTest(root, &TestAppendChar, "utf8tst/TestAppendChar");
addTest(root, &TestAppend, "utf8tst/TestAppend");
addTest(root, &TestSurrogates, "utf8tst/TestSurrogates");
}
}
+static void TestTruncateIfIncomplete() {
+ // Difference from U8_SET_CP_START():
+ // U8_TRUNCATE_IF_INCOMPLETE() does not look at s[length].
+ // Therefore, if the last byte is a lead byte, then this macro truncates
+ // even if the byte at the input index cannot continue a valid sequence
+ // (including when that is not a trail byte).
+ // On the other hand, if the last byte is a trail byte, then the two macros behave the same.
+ static const struct {
+ const char *s;
+ int32_t expected;
+ } cases[] = {
+ { "", 0 },
+ { "a", 1 },
+ { "\x80", 1 },
+ { "\xC1", 1 },
+ { "\xC2", 0 },
+ { "\xE0", 0 },
+ { "\xF4", 0 },
+ { "\xF5", 1 },
+ { "\x80\x80", 2 },
+ { "\xC2\xA0", 2 },
+ { "\xE0\x9F", 2 },
+ { "\xE0\xA0", 0 },
+ { "\xED\x9F", 0 },
+ { "\xED\xA0", 2 },
+ { "\xF0\x8F", 2 },
+ { "\xF0\x90", 0 },
+ { "\xF4\x8F", 0 },
+ { "\xF4\x90", 2 },
+ { "\xF5\x80", 2 },
+ { "\x80\x80\x80", 3 },
+ { "\xC2\xA0\x80", 3 },
+ { "\xE0\xA0\x80", 3 },
+ { "\xF0\x8F\x80", 3 },
+ { "\xF0\x90\x80", 0 },
+ { "\xF4\x8F\x80", 0 },
+ { "\xF4\x90\x80", 3 },
+ { "\xF5\x80\x80", 3 },
+ { "\x80\x80\x80\x80", 4 },
+ { "\xC2\xA0\x80\x80", 4 },
+ { "\xE0\xA0\x80\x80", 4 },
+ { "\xF0\x90\x80\x80", 4 },
+ { "\xF5\x80\x80\x80", 4 }
+ };
+ int32_t i;
+ for (i = 0; i < UPRV_LENGTHOF(cases); ++i) {
+ const char *s = cases[i].s;
+ int32_t expected = cases[i].expected;
+ int32_t length = (int32_t)strlen(s);
+ int32_t adjusted = length;
+ U8_TRUNCATE_IF_INCOMPLETE(s, 0, adjusted);
+ if (adjusted != expected) {
+ log_err("ERROR: U8_TRUNCATE_IF_INCOMPLETE failed for i=%d, length=%d. Expected:%d Got:%d\n",
+ (int)i, (int)length, (int)expected, (int)adjusted);
+ }
+ }
+}
+
static void TestAppendChar(){
#if !U_HIDE_OBSOLETE_UTF_OLD_H
static const uint8_t s[11]={0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00};
CASE(50,TestT9452);
CASE(51,TestT11632);
CASE(52,TestPersianCalOverflow);
+ CASE(53,TestIslamicCalOverflow);
default: name = ""; break;
}
}
month = cal->get(UCAL_MONTH, status);
dayOfMonth = cal->get(UCAL_DATE, status);
if ( U_FAILURE(status) ) {
- errln("FAIL: Calendar->get MONTH/DATE for localeID %s, julianDay %d, status %s\n", localeID, jd, u_errorName(status));
+ errln("FAIL: Calendar->get MONTH/DATE for localeID %s, julianDay %d, status %s", localeID, jd, u_errorName(status));
} else if (month > maxMonth || dayOfMonth > maxDayOfMonth) {
- errln("FAIL: localeID %s, julianDay %d; maxMonth %d, got month %d; maxDayOfMonth %d, got dayOfMonth %d\n",
+ errln("FAIL: localeID %s, julianDay %d; maxMonth %d, got month %d; maxDayOfMonth %d, got dayOfMonth %d",
localeID, jd, maxMonth, month, maxDayOfMonth, dayOfMonth);
}
}
}
}
+/**
+ * @bug tickets 12661, 13538
+ */
+void CalendarRegressionTest::TestIslamicCalOverflow(void) {
+ const char* localeID = "ar@calendar=islamic-civil";
+ UErrorCode status = U_ZERO_ERROR;
+ Calendar* cal = Calendar::createInstance(Locale(localeID), status);
+ if(U_FAILURE(status)) {
+ dataerrln("FAIL: Calendar::createInstance for localeID %s: %s", localeID, u_errorName(status));
+ } else {
+ int32_t maxMonth = cal->getMaximum(UCAL_MONTH);
+ int32_t maxDayOfMonth = cal->getMaximum(UCAL_DATE);
+ int32_t jd, year, month, dayOfMonth;
+ for (jd = 73530872; jd <= 73530876; jd++) { // year 202002, int32_t overflow if jd >= 73530874
+ status = U_ZERO_ERROR;
+ cal->clear();
+ cal->set(UCAL_JULIAN_DAY, jd);
+ year = cal->get(UCAL_YEAR, status);
+ month = cal->get(UCAL_MONTH, status);
+ dayOfMonth = cal->get(UCAL_DATE, status);
+ if ( U_FAILURE(status) ) {
+ errln("FAIL: Calendar->get YEAR/MONTH/DATE for localeID %s, julianDay %d, status %s", localeID, jd, u_errorName(status));
+ } else if (month > maxMonth || dayOfMonth > maxDayOfMonth) {
+ errln("FAIL: localeID %s, julianDay %d; got year %d; maxMonth %d, got month %d; maxDayOfMonth %d, got dayOfMonth %d",
+ localeID, jd, year, maxMonth, month, maxDayOfMonth, dayOfMonth);
+ }
+ }
+ delete cal;
+ }
+}
+
#endif /* #if !UCONFIG_NO_FORMATTING */
void TestT9452(void);
void TestT11632(void);
void TestPersianCalOverflow(void);
+ void TestIslamicCalOverflow(void);
void printdate(GregorianCalendar *cal, const char *string);
void dowTest(UBool lenient) ;
TESTCASE(23, TestVariableDecimalPoint);
TESTCASE(24, TestLargeNumbers);
TESTCASE(25, TestCompactDecimalFormatStyle);
+ TESTCASE(26, TestParseFailure);
#else
TESTCASE(0, TestRBNFDisabled);
#endif
doTest(&rbnf, enTestFullData, false);
}
+void IntlTestRBNF::TestParseFailure() {
+ UErrorCode status = U_ZERO_ERROR;
+ RuleBasedNumberFormat rbnf(URBNF_SPELLOUT, Locale::getJapanese(), status);
+ static const char* testData[][1] = {
+ { "\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB" },
+ { NULL }
+ };
+ for (int i = 0; testData[i][0]; ++i) {
+ const char* spelledNumber = testData[i][0]; // spelled-out number
+
+ UnicodeString spelledNumberString = UnicodeString(spelledNumber).unescape();
+ Formattable actualNumber;
+ rbnf.parse(spelledNumberString, actualNumber, status);
+ if (status != U_INVALID_FORMAT_ERROR) { // I would have expected U_PARSE_ERROR, but NumberFormat::parse gives U_INVALID_FORMAT_ERROR
+ errln("FAIL: string should be unparseable %s %s", spelledNumber, u_errorName(status));
+ }
+ }
+}
+
void
IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* const testData[][2], UBool testParsing)
{
void TestRounding();
void TestLargeNumbers();
void TestCompactDecimalFormatStyle();
+ void TestParseFailure();
protected:
virtual void doTest(RuleBasedNumberFormat* formatter, const char* const testData[][2], UBool testParsing);
FIELD_INIT(positiveSuffix, &gStrOps),
FIELD_INIT(negativePrefix, &gStrOps),
FIELD_INIT(negativeSuffix, &gStrOps),
+ FIELD_INIT(signAlwaysShown, &gIntOps),
FIELD_INIT(localizedPattern, &gStrOps),
FIELD_INIT(toPattern, &gStrOps),
FIELD_INIT(toLocalizedPattern, &gStrOps),
kPositiveSuffix,
kNegativePrefix,
kNegativeSuffix,
+ kSignAlwaysShown,
kLocalizedPattern,
kToPattern,
kToLocalizedPattern,
UnicodeString positiveSuffix;
UnicodeString negativePrefix;
UnicodeString negativeSuffix;
+ int32_t signAlwaysShown;
UnicodeString localizedPattern;
UnicodeString toPattern;
UnicodeString toLocalizedPattern;
UBool positiveSuffixFlag;
UBool negativePrefixFlag;
UBool negativeSuffixFlag;
+ UBool signAlwaysShownFlag;
UBool localizedPatternFlag;
UBool toPatternFlag;
UBool toLocalizedPatternFlag;
u"8.765",
u"0");
+ assertFormatDescendingBig(
+ u"Indic locale with THOUSANDS grouping",
+ NumberFormatter::with().grouping(UNUM_GROUPING_THOUSANDS),
+ Locale("en-IN"),
+ u"87,650,000",
+ u"8,765,000",
+ u"876,500",
+ u"87,650",
+ u"8,765",
+ u"876.5",
+ u"87.65",
+ u"8.765",
+ u"0");
+
// NOTE: Hungarian is interesting because it has minimumGroupingDigits=4 in locale data
// If this test breaks due to data changes, find another locale that has minimumGroupingDigits.
assertFormatDescendingBig(
if (tuple.negativeSuffixFlag) {
fmt.setNegativeSuffix(tuple.negativeSuffix);
}
+ if (tuple.signAlwaysShownFlag) {
+ // Not currently supported
+ }
if (tuple.localizedPatternFlag) {
UErrorCode status = U_ZERO_ERROR;
fmt.applyLocalizedPattern(tuple.localizedPattern, status);
en_US 0 123,456 123
en_US 1 123.456 123.456
en_US 0 123.456 123.456
-fr_FR 1 123,456 123.456
-fr_FR 0 123,456 123.456
-// JDK returns 123 here; not sure why.
-fr_FR 1 123.456 123456 K
-fr_FR 0 123.456 123
+it_IT 1 123,456 123.456
+it_IT 0 123,456 123.456
+it_IT 1 123.456 123456
+it_IT 0 123.456 123
test no grouping in pattern with parsing
set pattern 0
1,2345,6789 4
1,23,45,6789 4 K 2
1,23,45,6789 4 K 2 2
-// Q only supports minGrouping<=2
123,456789 6 6 3
-123456789 6 JKQ 6 4
+123456789 6 JK 6 4
test multiplier setters
set locale en_US
+3.52EE4 3.52
+1,234,567.8901 1234567.8901
+1,23,4567.8901 1234567.8901
+// Fraction grouping is disabled by default
+1,23,4567.89,01 1234567.89
+1,23,456.78.9 123456.78
+12.34,56 12.34
// JDK does allow separators in the wrong place and parses as -5347.25
(53,47.25) fail K
// strict requires prefix or suffix, except in C
-65,347.25 fail
+65,347.25 fail
+3.52E4 35200
(34.8E-3) -0.0348
(3425E-1) -342.5
// Strict doesn't allow separators in sci notation.
(63,425) -63425
-// JDK and S allow separators in sci notation and parses as -342.5
-// C passes
-(63,425E-1) fail CKS
+// J does not allow grouping separators in scientific notation.
+(63,425E-1) -6342.5 J
// Both prefix and suffix needed for strict.
// JDK accepts this and parses as -342.5
(3425E-1 fail K
begin
parse output breaks
// S is the only implementation that passes these cases.
-// C consumes the '9' as a digit and assumes number is negative
+// C and P consume the '9' as a digit and assumes number is negative
// J and JDK bail
-6549K 654 CJK
-// C consumes the '9' as a digit and assumes number is negative
+6549K 654 CJKP
+// C and P consume the '9' as a digit and assumes number is negative
// J and JDK bail
-6549N -654 CJK
+6549N -654 CJKP
test really strange prefix
set locale en
set locale en
set pattern '-'#y
begin
-parse output
+parse output breaks
-45y 45
test parse with locale symbols
USD 53.45 53.45 USD J
53.45USD 53.45 USD CJ
USD53.45 53.45 USD
-// S fails these because '(' is an incomplete prefix.
-(7.92) USD -7.92 USD CJS
-(7.92) GBP -7.92 GBP CJS
-(7.926) USD -7.926 USD CJS
-(7.926 USD) -7.926 USD CJS
+// P fails these because '(' is an incomplete prefix.
+(7.92) USD -7.92 USD CJP
+(7.92) GBP -7.92 GBP CJP
+(7.926) USD -7.926 USD CJP
+(7.926 USD) -7.926 USD CJP
(USD 7.926) -7.926 USD J
-USD (7.926) -7.926 USD CJS
-USD (7.92) -7.92 USD CJS
-(7.92)USD -7.92 USD CJS
-USD(7.92) -7.92 USD CJS
-(8) USD -8 USD CJS
+USD (7.926) -7.926 USD CJP
+USD (7.92) -7.92 USD CJP
+(7.92)USD -7.92 USD CJP
+USD(7.92) -7.92 USD CJP
+(8) USD -8 USD CJP
-8 USD -8 USD C
67 USD 67 USD C
53.45$ fail USD
set pattern \u00a4 0.00;\u00a4 -#
set locale fa_IR
begin
-parse output outputCurrency
+parse output outputCurrency breaks
\u0631\u06cc\u0627\u0644 \u06F1\u06F2\u06F3\u06F5 1235 IRR
IRR \u06F1\u06F2\u06F3\u06F5 1235 IRR
-\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 \u06F1\u06F2\u06F3\u06F5 1235 IRR
+// P fails here because this currency name is in the Trie only, but it has the same prefix as the non-Trie currency
+\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 \u06F1\u06F2\u06F3\u06F5 1235 IRR P
IRR 1235 1235 IRR
\u0631\u06cc\u0627\u0644 1235 1235 IRR
-\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 1235 1235 IRR
+\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 1235 1235 IRR P
test parse foreign currency ISO
set pattern \u00a4\u00a4 0.00;\u00a4\u00a4 -#
set locale fa_IR
begin
-parse output outputCurrency
+parse output outputCurrency breaks
\u0631\u06cc\u0627\u0644 \u06F1\u06F2\u06F3\u06F5 1235 IRR
IRR \u06F1\u06F2\u06F3\u06F5 1235 IRR
-\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 \u06F1\u06F2\u06F3\u06F5 1235 IRR
+\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 \u06F1\u06F2\u06F3\u06F5 1235 IRR P
IRR 1235 1235 IRR
\u0631\u06cc\u0627\u0644 1235 1235 IRR
-\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 1235 1235 IRR
+\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 1235 1235 IRR P
test parse foreign currency full
set pattern \u00a4\u00a4\u00a4 0.00;\u00a4\u00a4\u00a4 -#
set locale fa_IR
begin
-parse output outputCurrency
+parse output outputCurrency breaks
\u0631\u06cc\u0627\u0644 \u06F1\u06F2\u06F3\u06F5 1235 IRR
IRR \u06F1\u06F2\u06F3\u06F5 1235 IRR
-\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 \u06F1\u06F2\u06F3\u06F5 1235 IRR
+\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 \u06F1\u06F2\u06F3\u06F5 1235 IRR P
IRR 1235 1235 IRR
\u0631\u06cc\u0627\u0644 1235 1235 IRR
-\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 1235 1235 IRR
+\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 1235 1235 IRR P
test parse currency with foreign symbols symbol english
set pattern \u00a4 0.00;\u00a4 (#)
test parse currency without currency mode
// Should accept a symbol associated with the currency specified by the API,
// but should not traverse the full currency data.
+// P always traverses full currency data.
set locale en_US
set pattern \u00a4#,##0.00
begin
parse currency output breaks
$52.41 USD 52.41
USD52.41 USD 52.41 K
-\u20ac52.41 USD fail
-EUR52.41 USD fail
-$52.41 EUR fail
-USD52.41 EUR fail
+\u20ac52.41 USD fail P
+EUR52.41 USD fail P
+$52.41 EUR fail P
+USD52.41 EUR fail P
\u20ac52.41 EUR 52.41 K
EUR52.41 EUR 52.41
set lenient 0
begin
parse output outputCurrency breaks
-$53.45 53.45 USD
+$53.45 53.45 USD P
53.45 USD 53.45 USD
USD 53.45 fail USD
53.45USD fail USD
-USD53.45 53.45 USD
+USD53.45 53.45 USD P
(7.92) USD -7.92 USD
(7.92) EUR -7.92 EUR
(7.926) USD -7.926 USD
53.45 US Dollars 53.45 USD
US Dollar 53.45 fail USD
53.45 US Dollar 53.45 USD
-US Dollars53.45 53.45 USD
+US Dollars53.45 53.45 USD P
53.45US Dollars fail USD
-US Dollar53.45 53.45 USD
+US Dollar53.45 53.45 USD P
US Dollat53.45 fail USD
53.45US Dollar fail USD
US Dollars (53.45) fail USD
set locale en
set pattern #
begin
-parse output breaks
--123 -123
-- 123 -123 JK
- -123 -123 JK
- - 123 -123 JK
-123- -123 CJKS
-123 - -123 CJKS
+pattern parse output breaks
+# -123 -123
+# - 123 -123 JK
+# -123 -123 JK
+# - 123 -123 JK
+# 123- 123
+# 123 - 123
+#;#- 123- -123
+#;#- 123 - -123 JK
test parse case sensitive
set locale en
1E2147483646 1E2147483646
1E-2147483649 0
1E-2147483648 0
-// S returns zero here
-1E-2147483647 1E-2147483647 S
+// P returns zero here
+1E-2147483647 1E-2147483647 P
1E-2147483646 1E-2147483646
test format push limits
100 9999999999999.9950000000001 9999999999999.9950000000001 C
2 9999999999999.9950000000001 10000000000000.00 C
2 9999999.99499999 9999999.99
-// K doesn't support halfDowm rounding mode?
+// K doesn't support halfDown rounding mode?
2 9999999.995 9999999.99 K
2 9999999.99500001 10000000.00
100 56565656565656565656565656565656565656565656565656565656565656 56565656565656565656565656565656565656565656565656565656565656.00 C
set pattern #,##0
begin
parse output breaks
-// K and J return null; S and C return 99
- 9 9 9 CJKS
+// K and J return null; S, C, and P return 99
+ 9 9 9 CJKP
// K returns null
9 999 9999 K
56i jk -56 CJK
56i jk -56 CJK
// S and C get 56 (accepts ' ' gs grouping); J and K get null
-5 6 fail CS
+5 6 fail CP
56 5 JK
test parse spaces in grouping
set pattern #,##0
begin
parse output breaks
-// C, J and S get "12" here
-1 2 1 CJS
-1 23 1 CJS
+// C, J, S, and P get "12" here
+1 2 1 CJP
+1 23 1 CJP
// K gets 1 here; doesn't pick up the grouping separator
1 234 1234 K
parse output breaks
55% 0.55
// J and K get null
-55 0.55 JK
+// P requires the symbol to be present and gets 55
+55 0.55 JKP
test trailing grouping separators in pattern
// This test is for #13115
parse output breaks
9223372036854775807% 92233720368547758.07
+test sign always shown
+set locale en
+set pattern 0
+set signAlwaysShown 1
+begin
+format output breaks
+// C, J and K do not support this feature
+42 +42 CJK
+0 +0 CJK
+-42 -42
+
+test parse strict with plus sign
+set locale en
+set pattern 0
+set signAlwaysShown 1
+begin
+lenient parse output breaks
+1 42 42
+1 -42 -42
+1 +42 42 CJK
+1 0 0
+1 +0 0 CJK
+0 42 fail CJK
+0 -42 -42
+0 +42 42 CJK
+0 0 fail CJK
+0 +0 0 CJK
+