From 3a6c706388ef6666c6965b915fddb43473bb8f0f Mon Sep 17 00:00:00 2001 From: Shane Carr Date: Sat, 29 Apr 2017 01:33:06 +0000 Subject: [PATCH] ICU-13177 Moving "codePointZero" optimization from PositiveDecimalFormat.java to DecimalFormatSymbols.java. X-SVN-Rev: 40091 --- .../formatters/PositiveDecimalFormat.java | 20 +---- .../ibm/icu/text/DecimalFormatSymbols.java | 80 +++++++++++++++++-- .../format/IntlTestDecimalFormatSymbols.java | 15 ++++ 3 files changed, 92 insertions(+), 23 deletions(-) diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/formatters/PositiveDecimalFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/formatters/PositiveDecimalFormat.java index b4a33dcc8ac..fdb26062f4c 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/formatters/PositiveDecimalFormat.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/formatters/PositiveDecimalFormat.java @@ -123,26 +123,14 @@ public class PositiveDecimalFormat implements Format.TargetFormat { decimalSeparator = symbols.getDecimalSeparatorString(); } - // Check to see if we can use code points instead of strings (~15% format performance boost) - int _codePointZero = -1; - String[] _digitStrings = symbols.getDigitStringsLocal(); - for (int i = 0; i < _digitStrings.length; i++) { - int cp = Character.codePointAt(_digitStrings[i], 0); - int cc = Character.charCount(cp); - if (cc != _digitStrings[i].length()) { - _codePointZero = -1; - break; - } else if (i == 0) { - _codePointZero = cp; - } else if (cp != _codePointZero + i) { - _codePointZero = -1; - break; - } - } + // Check to see if we can use code points instead of strings + int _codePointZero = symbols.getCodePointZero(); if (_codePointZero != -1) { + // Fast Path (~9% faster than slow path when formatting long strings) digitStrings = null; codePointZero = _codePointZero; } else { + // Slow Path digitStrings = symbols.getDigitStrings(); // makes a copy codePointZero = -1; } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/DecimalFormatSymbols.java b/icu4j/main/classes/core/src/com/ibm/icu/text/DecimalFormatSymbols.java index 07e812bcd2a..e2354c1d213 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/DecimalFormatSymbols.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/DecimalFormatSymbols.java @@ -215,6 +215,9 @@ public class DecimalFormatSymbols implements Cloneable, Serializable { digits[i] = d; } } + + // Update codePointZero: it is simply zeroDigit. + codePointZero = zeroDigit; } /** @@ -231,6 +234,11 @@ public class DecimalFormatSymbols implements Cloneable, Serializable { /** * Returns the array of strings used as digits, in order from 0 through 9 * Package private method - doesn't create a defensively copy. + * + *

WARNING: Mutating the returned array will cause undefined behavior. + * If you need to change the value of the array, use {@link #getDigitStrings} and {@link + * #setDigitStrings} instead. + * * @return the array of digit strings * @internal * @deprecated This API is ICU internal only. @@ -240,6 +248,19 @@ public class DecimalFormatSymbols implements Cloneable, Serializable { return digitStrings; } + /** + * If the digit strings array corresponds to a sequence of increasing code points, this method + * returns the code point corresponding to the first entry in the digit strings array. If the + * digit strings array is not a sequence of increasing code points, returns -1. + * + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + public int getCodePointZero() { + return codePointZero; + } + /** * {@icu} Sets the array of strings used as digits, in order from 0 through 9 *

@@ -266,22 +287,48 @@ public class DecimalFormatSymbols implements Cloneable, Serializable { } // Scan input array and create char[] representation if possible + // Also update codePointZero if possible String[] tmpDigitStrings = new String[10]; char[] tmpDigits = new char[10]; + int tmpCodePointZero = -1; for (int i = 0; i < 10; i++) { - if (digitStrings[i] == null) { + String digitStr = digitStrings[i]; + if (digitStr == null) { throw new IllegalArgumentException("The input digit string array contains a null element"); } - tmpDigitStrings[i] = digitStrings[i]; - if (tmpDigits != null && digitStrings[i].length() == 1) { - tmpDigits[i] = digitStrings[i].charAt(0); + tmpDigitStrings[i] = digitStr; + int cp, cc; + if (digitStr.length() == 0) { + cp = -1; + cc = 0; } else { - // contains digit string with multiple UTF-16 code units + cp = Character.codePointAt(digitStrings[i], 0); + cc = Character.charCount(cp); + } + if (cc == digitStr.length()) { + // One code point in this digit. + // If it is 1 UTF-16 code unit long, set it in tmpDigits. + if (cc == 1) { + tmpDigits[i] = (char) cp; + } else { + tmpDigits = null; + } + // Check for validity of tmpCodePointZero. + if (i == 0) { + tmpCodePointZero = cp; + } else if (cp != tmpCodePointZero + i) { + tmpCodePointZero = -1; + } + } else { + // More than one code point in this digit. + // codePointZero and tmpDigits are going to be invalid. + tmpCodePointZero = -1; tmpDigits = null; } } this.digitStrings = tmpDigitStrings; + this.codePointZero = tmpCodePointZero; if (tmpDigits == null) { // fallback to the default digit chars @@ -1545,8 +1592,11 @@ public class DecimalFormatSymbols implements Cloneable, Serializable { serialVersionOnStream = currentSerialVersion; - // recreate - currency = Currency.getInstance(intlCurrencySymbol); + // recreate + currency = Currency.getInstance(intlCurrencySymbol); + + // Refresh digitStrings in order to populate codePointZero + setDigitStrings(digitStrings); } /** @@ -1569,6 +1619,22 @@ public class DecimalFormatSymbols implements Cloneable, Serializable { */ private String digitStrings[]; + /** + * Dealing with code points is faster than dealing with strings when formatting. Because of + * this, we maintain a value containing the zero code point that is used whenever digitStrings + * represents a sequence of ten code points in order. + * + *

If the value stored here is positive, it means that the code point stored in this value + * corresponds to the digitStrings array, and zeroCodePoint can be used instead of the + * digitStrings array for the purposes of efficient formatting; if -1, then digitStrings does + * *not* contain a sequence of code points, and it must be used directly. + * + *

It is assumed that zeroCodePoint always shadows the value in digitStrings. zeroCodePoint + * should never be set directly; rather, it should be updated only when digitStrings mutates. + * That is, the flow of information is digitStrings -> zeroCodePoint, not the other way. + */ + private transient int codePointZero; + /** * Character used for thousands separator. * diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestDecimalFormatSymbols.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestDecimalFormatSymbols.java index 56688a236b9..caf78abcbe9 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestDecimalFormatSymbols.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestDecimalFormatSymbols.java @@ -269,6 +269,7 @@ public class IntlTestDecimalFormatSymbols extends com.ibm.icu.dev.test.TestFmwk "\uD801\uDCA0", "\uD801\uDCA1", "\uD801\uDCA2", "\uD801\uDCA3", "\uD801\uDCA4", "\uD801\uDCA5", "\uD801\uDCA6", "\uD801\uDCA7", "\uD801\uDCA8", "\uD801\uDCA9" }; + final String[] differentDigitStrings = {"0", "b", "3", "d", "5", "f", "7", "h", "9", "j"}; DecimalFormatSymbols symbols = new DecimalFormatSymbols(Locale.ENGLISH); @@ -276,6 +277,9 @@ public class IntlTestDecimalFormatSymbols extends com.ibm.icu.dev.test.TestFmwk if (!Arrays.equals(symbols.getDigitStrings(), osmanyaDigitStrings)) { errln("ERROR: Osmanya digits (supplementary) should be set"); } + if (Character.codePointAt(osmanyaDigitStrings[0], 0) != symbols.getCodePointZero()) { + errln("ERROR: Code point zero be Osmanya code point zero"); + } if (defZero != symbols.getZeroDigit()) { errln("ERROR: Zero digit should be 0"); } @@ -283,10 +287,21 @@ public class IntlTestDecimalFormatSymbols extends com.ibm.icu.dev.test.TestFmwk errln("ERROR: Char digits should be Latin digits"); } + symbols.setDigitStrings(differentDigitStrings); + if (!Arrays.equals(symbols.getDigitStrings(), differentDigitStrings)) { + errln("ERROR: Different digits should be set"); + } + if (-1 != symbols.getCodePointZero()) { + errln("ERROR: Code point zero should be invalid"); + } + // Reset digits to Latin symbols.setZeroDigit(defZero); if (!Arrays.equals(symbols.getDigitStrings(), defDigitStrings)) { errln("ERROR: Latin digits should be set" + symbols.getDigitStrings()[0]); } + if (defZero != symbols.getCodePointZero()) { + errln("ERROR: Code point zero be ASCII 0"); + } } } -- 2.40.0