From f7e4a8f2051f38b0651239bff523ac027ee8ab05 Mon Sep 17 00:00:00 2001
From: Craig Cornelius
* Standalone utility class providing UTF16 character conversions and indexing conversions.
@@ -23,27 +25,27 @@ package com.ibm.icu.text;
* Examples:
*
* The following examples illustrate use of some of these methods.
- *
+ *
*
* To find the UTF-32 length of a string, use:
- *
+ *
*
* To find the UTF-32 length of a string, use:
- *
+ *
*
* To find the UTF-32 length of a substring, use:
- *
+ *
*
* The offset argument must be greater than or equal to 0, and less than or equal to the length
* of source.
- *
+ *
* @param target String buffer to insert to
* @param offset16 Offset which char32 will be inserted in
* @param char32 Codepoint to be inserted
@@ -1429,7 +1431,7 @@ public final class UTF16 {
*
* The offset argument must be greater than or equal to 0, and less than or equal to the limit.
- *
+ *
* @param target Char array to insert to
* @param limit End index of the char array, limit <= target.length
* @param offset16 Offset which char32 will be inserted in
@@ -1458,7 +1460,7 @@ public final class UTF16 {
/**
* Removes the codepoint at the specified position in this target (shortening target by 1
* character if the codepoint is a non-supplementary, 2 otherwise).
- *
+ *
* @param target String buffer to remove codepoint from
* @param offset16 Offset which the codepoint will be removed
* @return a reference to target
@@ -1483,7 +1485,7 @@ public final class UTF16 {
/**
* Removes the codepoint at the specified position in this target (shortening target by 1
* character if the codepoint is a non-supplementary, 2 otherwise).
- *
+ *
* @param target String buffer to remove codepoint from
* @param limit End index of the char array, limit <= target.length
* @param offset16 Offset which the codepoint will be removed
@@ -1523,7 +1525,7 @@ public final class UTF16 {
*
* // iteration forwards: Original
* for (int i = 0; i < s.length(); ++i) {
* char ch = s.charAt(i);
* doSomethingWith(ch);
* }
- *
+ *
* // iteration forwards: Changes for UTF-32
* int ch;
* for (int i = 0; i < s.length(); i += UTF16.getCharCount(ch)) {
* ch = UTF16.charAt(s, i);
* doSomethingWith(ch);
* }
- *
+ *
* // iteration backwards: Original
* for (int i = s.length() - 1; i >= 0; --i) {
* char ch = s.charAt(i);
* doSomethingWith(ch);
* }
- *
+ *
* // iteration backwards: Changes for UTF-32
* int ch;
* for (int i = s.length() - 1; i > 0; i -= UTF16.getCharCount(ch)) {
@@ -51,7 +53,7 @@ package com.ibm.icu.text;
* doSomethingWith(ch);
* }
*
- *
+ *
* Notes:
*
*
- *
+ *
* @author Mark Davis, with help from Markus Scherer
* @stable ICU 2.1
*/
@@ -87,7 +89,7 @@ public final class UTF16 {
* Value returned in {@link #bounds(String, int) bounds()}.
* These values are chosen specifically so that it actually represents the position of the
* character [offset16 - (value >> 2), offset16 + (value & 3)]
- *
+ *
* @stable ICU 2.1
*/
public static final int SINGLE_CHAR_BOUNDARY = 1, LEAD_SURROGATE_BOUNDARY = 2,
@@ -95,63 +97,63 @@ public final class UTF16 {
/**
* The lowest Unicode code point value.
- *
+ *
* @stable ICU 2.1
*/
public static final int CODEPOINT_MIN_VALUE = 0;
/**
* The highest Unicode code point value (scalar value) according to the Unicode Standard.
- *
+ *
* @stable ICU 2.1
*/
public static final int CODEPOINT_MAX_VALUE = 0x10ffff;
/**
* The minimum value for Supplementary code points
- *
+ *
* @stable ICU 2.1
*/
public static final int SUPPLEMENTARY_MIN_VALUE = 0x10000;
/**
* Lead surrogate minimum value
- *
+ *
* @stable ICU 2.1
*/
public static final int LEAD_SURROGATE_MIN_VALUE = 0xD800;
/**
* Trail surrogate minimum value
- *
+ *
* @stable ICU 2.1
*/
public static final int TRAIL_SURROGATE_MIN_VALUE = 0xDC00;
/**
* Lead surrogate maximum value
- *
+ *
* @stable ICU 2.1
*/
public static final int LEAD_SURROGATE_MAX_VALUE = 0xDBFF;
/**
* Trail surrogate maximum value
- *
+ *
* @stable ICU 2.1
*/
public static final int TRAIL_SURROGATE_MAX_VALUE = 0xDFFF;
/**
* Surrogate minimum value
- *
+ *
* @stable ICU 2.1
*/
public static final int SURROGATE_MIN_VALUE = LEAD_SURROGATE_MIN_VALUE;
/**
* Maximum surrogate value
- *
+ *
* @stable ICU 2.1
*/
public static final int SURROGATE_MAX_VALUE = TRAIL_SURROGATE_MAX_VALUE;
@@ -206,7 +208,7 @@ public final class UTF16 {
* on the return value. If the char retrieved is part of a surrogate pair, its supplementary
* character will be returned. If a complete supplementary character is not found the incomplete
* character will be returned
- *
+ *
* @param source Array of UTF-16 chars
* @param offset16 UTF-16 offset to the start of the character.
* @return UTF-32 value for the UTF-32 value that contains the char at offset16. The boundaries
@@ -260,7 +262,7 @@ public final class UTF16 {
* on the return value. If the char retrieved is part of a surrogate pair, its supplementary
* character will be returned. If a complete supplementary character is not found the incomplete
* character will be returned
- *
+ *
* @param source Array of UTF-16 chars
* @param offset16 UTF-16 offset to the start of the character.
* @return UTF-32 value for the UTF-32 value that contains the char at offset16. The boundaries
@@ -316,7 +318,7 @@ public final class UTF16 {
* on the return value. If the char retrieved is part of a surrogate pair, its supplementary
* character will be returned. If a complete supplementary character is not found the incomplete
* character will be returned
- *
+ *
* @param source UTF-16 chars string buffer
* @param offset16 UTF-16 offset to the start of the character.
* @return UTF-32 value for the UTF-32 value that contains the char at offset16. The boundaries
@@ -366,7 +368,7 @@ public final class UTF16 {
* on the return value. If the char retrieved is part of a surrogate pair, its supplementary
* character will be returned. If a complete supplementary character is not found the incomplete
* character will be returned
- *
+ *
* @param source Array of UTF-16 chars
* @param start Offset to substring in the source array for analyzing
* @param limit Offset to substring in the source array for analyzing
@@ -419,7 +421,7 @@ public final class UTF16 {
* on the return value. If the char retrieved is part of a surrogate pair, its supplementary
* character will be returned. If a complete supplementary character is not found the incomplete
* character will be returned
- *
+ *
* @param source UTF-16 chars string buffer
* @param offset16 UTF-16 offset to the start of the character.
* @return UTF-32 value for the UTF-32 value that contains the char at offset16. The boundaries
@@ -465,7 +467,7 @@ public final class UTF16 {
* Determines how many chars this char32 requires. If a validity check is required, use Lead
@@ -75,7 +77,7 @@ package com.ibm.icu.text;
* compiler doesn't fold static final methods. Since surrogate pairs will form an exceeding small
* percentage of all the text in the world, the singleton case should always be optimized for.
* isLegal()
* on char32 before calling.
- *
+ *
* @param char32 The input codepoint.
* @return 2 if is in supplementary space, otherwise 1.
* @stable ICU 2.1
@@ -479,7 +481,7 @@ public final class UTF16 {
/**
* Returns the type of the boundaries around the char at offset16. Used for random access.
- *
+ *
* @param source Text to analyse
* @param offset16 UTF-16 offset
* @return
@@ -515,7 +517,7 @@ public final class UTF16 {
/**
* Returns the type of the boundaries around the char at offset16. Used for random access.
- *
+ *
* @param source String buffer to analyse
* @param offset16 UTF16 offset
* @return
@@ -553,7 +555,7 @@ public final class UTF16 {
* Returns the type of the boundaries around the char at offset16. Used for random access. Note
* that the boundaries are determined with respect to the subarray, hence the char array
* {0xD800, 0xDC00} has the result SINGLE_CHAR_BOUNDARY for start = offset16 = 0 and limit = 1.
- *
+ *
* @param source Char array to analyse
* @param start Offset to substring in the source array for analyzing
* @param limit Offset to substring in the source array for analyzing
@@ -595,7 +597,7 @@ public final class UTF16 {
/**
* Determines whether the code value is a surrogate.
- *
+ *
* @param char16 The input character.
* @return true If the input character is a surrogate.
* @stable ICU 2.1
@@ -606,7 +608,7 @@ public final class UTF16 {
/**
* Determines whether the character is a trail surrogate.
- *
+ *
* @param char16 The input character.
* @return true If the input character is a trail surrogate.
* @stable ICU 2.1
@@ -617,7 +619,7 @@ public final class UTF16 {
/**
* Determines whether the character is a lead surrogate.
- *
+ *
* @param char16 The input character.
* @return true If the input character is a lead surrogate
* @stable ICU 2.1
@@ -630,7 +632,7 @@ public final class UTF16 {
* Returns the lead surrogate. If a validity check is required, use
* isLegal()
on char32
* before calling.
- *
+ *
* @param char32 The input character.
* @return lead surrogate if the getCharCount(ch) is 2;
* and 0 otherwise (note: 0 is not a valid lead surrogate).
@@ -647,7 +649,7 @@ public final class UTF16 {
* Returns the trail surrogate. If a validity check is required, use
* isLegal()
on char32
* before calling.
- *
+ *
* @param char32 The input character.
* @return the trail surrogate if the getCharCount(ch) is 2;
* otherwise the character itself
@@ -664,7 +666,7 @@ public final class UTF16 {
* Convenience method corresponding to String.valueOf(char). Returns a one or two char string
* containing the UTF-32 value in UTF16 format. If a validity check is required, use
* {@link com.ibm.icu.lang.UCharacter#isLegal(int)} on char32 before calling.
- *
+ *
* @param char32 The input character.
* @return string value of char32 in UTF16 format
* @exception IllegalArgumentException Thrown if char32 is a invalid codepoint.
@@ -684,7 +686,7 @@ public final class UTF16 {
* required, use {@link com.ibm.icu.lang.UCharacter#isLegal(int)} on the
* codepoint at offset16 before calling. The result returned will be a newly created String
* obtained by calling source.substring(..) with the appropriate indexes.
- *
+ *
* @param source The input string.
* @param offset16 The UTF16 index to the codepoint in source
* @return string value of char32 in UTF16 format
@@ -708,7 +710,7 @@ public final class UTF16 {
* is required, use {@link com.ibm.icu.lang.UCharacter#isLegal(int)} on
* the codepoint at offset16 before calling. The result returned will be a newly created String
* obtained by calling source.substring(..) with the appropriate indexes.
- *
+ *
* @param source The input string buffer.
* @param offset16 The UTF16 index to the codepoint in source
* @return string value of char32 in UTF16 format
@@ -734,7 +736,7 @@ public final class UTF16 {
* {@link com.ibm.icu.lang.UCharacter#isLegal(int)} on the codepoint at
* offset16 before calling. The result returned will be a newly created String containing the
* relevant characters.
- *
+ *
* @param source The input char array.
* @param start Start index of the subarray
* @param limit End index of the subarray
@@ -755,7 +757,7 @@ public final class UTF16 {
/**
* Returns the UTF-16 offset that corresponds to a UTF-32 offset. Used for random access. See
* the {@link UTF16 class description} for notes on roundtripping.
- *
+ *
* @param source The UTF-16 string
* @param offset32 UTF-32 offset
* @return UTF-16 offset
@@ -787,7 +789,7 @@ public final class UTF16 {
/**
* Returns the UTF-16 offset that corresponds to a UTF-32 offset. Used for random access. See
* the {@link UTF16 class description} for notes on roundtripping.
- *
+ *
* @param source The UTF-16 string buffer
* @param offset32 UTF-32 offset
* @return UTF-16 offset
@@ -819,7 +821,7 @@ public final class UTF16 {
/**
* Returns the UTF-16 offset that corresponds to a UTF-32 offset. Used for random access. See
* the {@link UTF16 class description} for notes on roundtripping.
- *
+ *
* @param source The UTF-16 char array whose substring is to be analysed
* @param start Offset of the substring to be analysed
* @param limit Offset of the substring to be analysed
@@ -858,11 +860,11 @@ public final class UTF16 {
* of the lead of the pair is returned.
*
* len32 = countCodePoint(source, source.length());
*
- *
+ *
* @param source Text to analyse
* @param offset16 UTF-16 offset < source text length.
* @return UTF-32 offset
@@ -909,7 +911,7 @@ public final class UTF16 {
* of the lead of the pair is returned.
*
* len32 = countCodePoint(source);
*
@@ -960,7 +962,7 @@ public final class UTF16 {
* of the lead of the pair is returned.
*
* len32 = countCodePoint(source, start, limit);
*
@@ -1010,7 +1012,7 @@ public final class UTF16 {
* Append a single UTF-32 value to the end of a StringBuffer. If a validity check is required,
* use {@link com.ibm.icu.lang.UCharacter#isLegal(int)} on char32 before
* calling.
- *
+ *
* @param target The buffer to append to
* @param char32 Value to append.
* @return the updated StringBuffer
@@ -1036,7 +1038,7 @@ public final class UTF16 {
/**
* Cover JDK 1.5 APIs. Append the code point to the buffer and return the buffer as a
* convenience.
- *
+ *
* @param target The buffer to append to
* @param cp The code point to append
* @return the updated StringBuffer
@@ -1049,7 +1051,7 @@ public final class UTF16 {
/**
* Adds a codepoint to offset16 position of the argument char array.
- *
+ *
* @param target Char array to be append with the new code point
* @param limit UTF16 offset which the codepoint will be appended.
* @param char32 Code point to be appended
@@ -1075,7 +1077,7 @@ public final class UTF16 {
/**
* Number of codepoints in a UTF16 String
- *
+ *
* @param source UTF16 string
* @return number of codepoint in string
* @stable ICU 2.1
@@ -1089,7 +1091,7 @@ public final class UTF16 {
/**
* Number of codepoints in a UTF16 String buffer
- *
+ *
* @param source UTF16 string buffer
* @return number of codepoint in string
* @stable ICU 2.1
@@ -1103,7 +1105,7 @@ public final class UTF16 {
/**
* Number of codepoints in a UTF16 char array substring
- *
+ *
* @param source UTF16 char array
* @param start Offset of the substring
* @param limit Offset of the substring
@@ -1121,7 +1123,7 @@ public final class UTF16 {
/**
* Set a code point into a UTF16 position. Adjusts target according if we are replacing a
* non-supplementary codepoint with a supplementary and vice versa.
- *
+ *
* @param target Stringbuffer
* @param offset16 UTF16 position to insert into
* @param char32 Code point
@@ -1152,7 +1154,7 @@ public final class UTF16 {
/**
* Set a code point into a UTF16 position in a char array. Adjusts target according if we are
* replacing a non-supplementary codepoint with a supplementary and vice versa.
- *
+ *
* @param target char array
* @param limit numbers of valid chars in target, different from target.length. limit counts the
* number of chars in target that represents a string, not the size of array target.
@@ -1218,7 +1220,7 @@ public final class UTF16 {
/**
* Shifts offset16 by the argument number of codepoints
- *
+ *
* @param source string
* @param offset16 UTF16 position to shift
* @param shift32 number of codepoints to shift
@@ -1272,7 +1274,7 @@ public final class UTF16 {
/**
* Shifts offset16 by the argument number of codepoints
- *
+ *
* @param source String buffer
* @param offset16 UTF16 position to shift
* @param shift32 Number of codepoints to shift
@@ -1326,7 +1328,7 @@ public final class UTF16 {
/**
* Shifts offset16 by the argument number of codepoints within a subarray.
- *
+ *
* @param source Char array
* @param start Position of the subarray to be performed on
* @param limit Position of the subarray to be performed on
@@ -1401,7 +1403,7 @@ public final class UTF16 {
*
Comparison is case insensitive, strings are folded using default mappings defined in * Unicode data file CaseFolding.txt, before comparison. - * + * * @stable ICU 2.4 */ public static final int FOLD_CASE_DEFAULT = 0; @@ -2385,7 +2387,7 @@ public final class UTF16 { * *
Comparison is case insensitive, strings are folded using modified mappings defined in * Unicode data file CaseFolding.txt, before comparison. - * + * * @stable ICU 2.4 * @see com.ibm.icu.lang.UCharacter#FOLD_CASE_EXCLUDE_SPECIAL_I */ @@ -2398,7 +2400,7 @@ public final class UTF16 { /** * Sets the comparison mode to code point compare if flag is true. Otherwise comparison mode * is set to code unit compare - * + * * @param flag True for code point compare, false for code unit compare * @stable ICU 2.4 */ @@ -2413,7 +2415,7 @@ public final class UTF16 { /** * Sets the Comparator to case-insensitive comparison mode if argument is true, otherwise * case sensitive comparison mode if set to false. - * + * * @param ignorecase True for case-insitive comparison, false for case sensitive comparison * @param foldcaseoption FOLD_CASE_DEFAULT or FOLD_CASE_EXCLUDE_SPECIAL_I. This option is used only * when ignorecase is set to true. If ignorecase is false, this option is @@ -2434,7 +2436,7 @@ public final class UTF16 { /** * Checks if the comparison mode is code point compare. - * + * * @return true for code point compare, false for code unit compare * @stable ICU 2.4 */ @@ -2444,7 +2446,7 @@ public final class UTF16 { /** * Checks if Comparator is in the case insensitive mode. - * + * * @return true if Comparator performs case insensitive comparison, false otherwise * @stable ICU 2.4 */ @@ -2454,7 +2456,7 @@ public final class UTF16 { /** * Gets the fold case options set in Comparator to be used with case insensitive comparison. - * + * * @return either FOLD_CASE_DEFAULT or FOLD_CASE_EXCLUDE_SPECIAL_I * @see #FOLD_CASE_DEFAULT * @see #FOLD_CASE_EXCLUDE_SPECIAL_I @@ -2468,7 +2470,7 @@ public final class UTF16 { /** * Compare two strings depending on the options selected during construction. - * + * * @param a first source string. * @param b second source string. * @return 0 returned if a == b. If a < b, a negative value is returned. Otherwise if a > b, @@ -2476,8 +2478,9 @@ public final class UTF16 { * @exception ClassCastException thrown when either a or b is not a String object * @stable ICU 4.4 */ + @Override public int compare(String a, String b) { - if (a == b) { + if (Utility.sameObjects(a, b)) { return 0; } if (a == null) { @@ -2521,7 +2524,7 @@ public final class UTF16 { /** * Compares case insensitive. This is a direct port of ICU4C, to make maintainence life * easier. - * + * * @param s1 * first string to compare * @param s2 @@ -2536,7 +2539,7 @@ public final class UTF16 { /** * Compares case sensitive. This is a direct port of ICU4C, to make maintainence life * easier. - * + * * @param s1 * first string to compare * @param s2 @@ -2619,7 +2622,7 @@ public final class UTF16 { } // at this point, len = 2 - int cp = Character.codePointAt(s, 0); + int cp = Character.codePointAt(s, 0); if (cp > 0xFFFF) { // is surrogate pair return cp; } @@ -2691,7 +2694,7 @@ public final class UTF16 { *
* The result is a string whose length is 1 for non-supplementary code points, 2 otherwise. *
- * + * * @param ch * code point * @return string representation of the code point diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/TransliteratorRegistry.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/TransliteratorRegistry.java index a8dea71df8e..29603103c00 100644 --- a/icu4j/main/classes/translit/src/com/ibm/icu/text/TransliteratorRegistry.java +++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/TransliteratorRegistry.java @@ -25,6 +25,7 @@ import java.util.ResourceBundle; import com.ibm.icu.impl.ICUData; import com.ibm.icu.impl.ICUResourceBundle; import com.ibm.icu.impl.LocaleUtility; +import com.ibm.icu.impl.Utility; import com.ibm.icu.lang.UScript; import com.ibm.icu.text.RuleBasedTransliterator.Data; import com.ibm.icu.util.CaseInsensitiveString; @@ -145,7 +146,7 @@ class TransliteratorRegistry { } public void reset() { - if (spec != top) { // [sic] pointer comparison + if (!Utility.sameObjects(spec, top)) { spec = top; isSpecLocale = (res != null); setupNext(); @@ -167,7 +168,7 @@ class TransliteratorRegistry { } } else { // Fallback to the script, which may be null - if (nextSpec != scriptName) { + if (!Utility.sameObjects(nextSpec, scriptName)) { nextSpec = scriptName; } else { nextSpec = null; -- 2.40.0