ICU-11900 UCharacter use java.lang.Character code point constants & methods, not...

author Markus Scherer <markus.icu@gmail.com>

Wed, 9 Sep 2015 00:13:06 +0000 (00:13 +0000)

committer Markus Scherer <markus.icu@gmail.com>

Wed, 9 Sep 2015 00:13:06 +0000 (00:13 +0000)
author Markus Scherer <markus.icu@gmail.com>
Wed, 9 Sep 2015 00:13:06 +0000 (00:13 +0000)
committer Markus Scherer <markus.icu@gmail.com>
Wed, 9 Sep 2015 00:13:06 +0000 (00:13 +0000)
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/BMPSet.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/BMPSet.java

index 0125da650aa5e55430e621b6963a1523792f1943..fc7daad604281f0f2afb3ca89456cd347507a9b9 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/BMPSet.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/BMPSet.java
@@ -1,7 +1,7 @@
  /*
   ******************************************************************************
   *
- *   Copyright (C) 2009-2014, International Business Machines
+ *   Copyright (C) 2009-2015, International Business Machines
   *   Corporation and others.  All Rights Reserved.
   *
   ******************************************************************************
@@ -170,7 +170,7 @@ public final class BMPSet {
                      }
                  } else {
                      // surrogate pair
-                    int supplementary = UCharacterProperty.getRawSupplementary(c, c2);
+                    int supplementary = Character.toCodePoint(c, c2);
                      if (!containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) {
                          break;
                      }
@@ -209,7 +209,7 @@ public final class BMPSet {
                      }
                  } else {
                      // surrogate pair
-                    int supplementary = UCharacterProperty.getRawSupplementary(c, c2);
+                    int supplementary = Character.toCodePoint(c, c2);
                      if (containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) {
                          break;
                      }
@@ -266,7 +266,7 @@ public final class BMPSet {
                      }
                  } else {
                      // surrogate pair
-                    int supplementary = UCharacterProperty.getRawSupplementary(c2, c);
+                    int supplementary = Character.toCodePoint(c2, c);
                      if (!containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) {
                          break;
                      }
@@ -306,7 +306,7 @@ public final class BMPSet {
                      }
                  } else {
                      // surrogate pair
-                    int supplementary = UCharacterProperty.getRawSupplementary(c2, c);
+                    int supplementary = Character.toCodePoint(c2, c);
                      if (containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) {
                          break;
                      }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/ReplaceableUCharacterIterator.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/ReplaceableUCharacterIterator.java

index 8a62218b8c29a27b9d19ad72376002a748e9baf9..acc0a9937522bdf2ae3041a00cd98710cb43a373 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/ReplaceableUCharacterIterator.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/ReplaceableUCharacterIterator.java
@@ -1,7 +1,7 @@
  /*
   *******************************************************************************
- * Copyright (C) 1996-2010, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 1996-2015, International Business Machines Corporation and
+ * others. All Rights Reserved.
   *******************************************************************************
   */
  package com.ibm.icu.impl;
@@ -108,9 +108,7 @@ public class ReplaceableUCharacterIterator extends UCharacterIterator {
              
              if(UTF16.isTrailSurrogate((char)ch2)){
                  // we found a surrogate pair
-                return UCharacterProperty.getRawSupplementary(
-                                                         (char)ch,(char)ch2
-                                                             );
+                return Character.toCodePoint((char)ch, (char)ch2);
              }
          }
          return ch;
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/TrieIterator.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/TrieIterator.java

index 13594a8017a799fd658bc8c4404f4c3a3c7a94df..a891a021831d130bc3c01a06706e0d8bbd9e105e 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/TrieIterator.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/TrieIterator.java
@@ -303,9 +303,7 @@ public class TrieIterator implements RangeValueIterator
                  // this is not a simple addition of 
                  // DATA_BLOCK_SUPPLEMENTARY_LENGTH since we need to consider
                  // that we might have moved some of the codepoints
-                m_nextCodepoint_ = UCharacterProperty.getRawSupplementary(
-                                     (char)nextLead, 
-                                     (char)UTF16.TRAIL_SURROGATE_MIN_VALUE);
+                m_nextCodepoint_ = Character.toCodePoint((char)nextLead, (char)UTF16.TRAIL_SURROGATE_MIN_VALUE);
                  continue;
              }
              if (m_trie_.m_dataManipulate_ == null) {
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterProperty.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterProperty.java

index 0df49f5b1d05621e3bb26ab61ef4ec30d7056dfd..fc28c9d44fe1d96085af01c1bf6db267966deca3 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterProperty.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterProperty.java
@@ -645,19 +645,6 @@ public final class UCharacterProperty
          }
      }
  
-    /**
-    * Forms a supplementary code point from the argument character<br>
-    * Note this is for internal use hence no checks for the validity of the
-    * surrogate characters are done
-    * @param lead lead surrogate character
-    * @param trail trailing surrogate character
-    * @return code point of the supplementary character
-    */
-    public static int getRawSupplementary(char lead, char trail)
-    {
-        return (lead << LEAD_SURROGATE_SHIFT_) + trail + SURROGATE_OFFSET_;
-    }
-
      /**
       * <p>
       * Unicode property names and property value names are compared
@@ -972,20 +959,6 @@ public final class UCharacterProperty
      */
      private static final String DATA_FILE_NAME_ = "uprops.icu";
  
-    /**
-    * Shift value for lead surrogate to form a supplementary character.
-    */
-    private static final int LEAD_SURROGATE_SHIFT_ = 10;
-    /**
-    * Offset to add to combined surrogate pair to avoid masking.
-    */
-    private static final int SURROGATE_OFFSET_ =
-                           UTF16.SUPPLEMENTARY_MIN_VALUE -
-                           (UTF16.SURROGATE_MIN_VALUE <<
-                           LEAD_SURROGATE_SHIFT_) -
-                           UTF16.TRAIL_SURROGATE_MIN_VALUE;
-
-
      // property data constants -------------------------------------------------
  
      /**
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/UnicodeSetStringSpan.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/UnicodeSetStringSpan.java

index b682f952b3e32b0a2e8e2bdb9be87707f25aa556..ead5fc92c2a4a6bb33e460be2817fa6bc493b10b 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/UnicodeSetStringSpan.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/UnicodeSetStringSpan.java
@@ -1,7 +1,7 @@
  /*
   ******************************************************************************
   *
- *   Copyright (C) 2009-2014, International Business Machines
+ *   Copyright (C) 2009-2015, International Business Machines
   *   Corporation and others.  All Rights Reserved.
   *
   ******************************************************************************
@@ -970,7 +970,7 @@ public class UnicodeSetStringSpan {
          if (c >= 0xd800 && c <= 0xdbff && length >= 2) {
              char c2 = s.charAt(start + 1);
              if (com.ibm.icu.text.UTF16.isTrailSurrogate(c2)) {
-                int supplementary = UCharacterProperty.getRawSupplementary(c, c2);
+                int supplementary = Character.toCodePoint(c, c2);
                  return set.contains(supplementary) ? 2 : -2;
              }
          }
@@ -982,7 +982,7 @@ public class UnicodeSetStringSpan {
          if (c >= 0xdc00 && c <= 0xdfff && length >= 2) {
              char c2 = s.charAt(length - 2);
              if (com.ibm.icu.text.UTF16.isLeadSurrogate(c2)) {
-                int supplementary = UCharacterProperty.getRawSupplementary(c2, c);
+                int supplementary = Character.toCodePoint(c2, c);
                  return set.contains(supplementary) ? 2 : -2;
              }
          }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Utility.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Utility.java

index ad20351137134be416e4ddb771dda7555f9f4aec..bc8df257c1ec89085edf6f9ee0d052e24363d194 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Utility.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Utility.java
@@ -866,8 +866,7 @@ public final class Utility {
                  }
                  if (UTF16.isTrailSurrogate((char) c)) {
                      offset = ahead;
-                    result = UCharacterProperty.getRawSupplementary(
-                            (char) result, (char) c);
+                    result = Character.toCodePoint((char) result, (char) c);
                  }
              }
              offset16[0] = offset;
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java b/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java

index 9d4991fa36ad100b5aa12153e861a3e770791046..e458ee25cde3a6db0ddf9ddbb1563049acc3819c 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java
@@ -26,7 +26,6 @@ import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
  import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection;
  import com.ibm.icu.text.BreakIterator;
  import com.ibm.icu.text.Normalizer2;
-import com.ibm.icu.text.UTF16;
  import com.ibm.icu.util.RangeValueIterator;
  import com.ibm.icu.util.ULocale;
  import com.ibm.icu.util.ValueIterator;
@@ -35,14 +34,17 @@ import com.ibm.icu.util.VersionInfo;
  /**
   * {@icuenhanced java.lang.Character}.{@icu _usage_}
   *
- * <p>The UCharacter class provides extensions to the
- * <a href="http://java.sun.com/j2se/1.5/docs/api/java/lang/Character.html">
- * java.lang.Character</a> class. These extensions provide support for
- * more Unicode properties and together with the <a href=../text/UTF16.html>UTF16</a>
- * class, provide support for supplementary characters (those with code
- * points above U+FFFF).
+ * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class.
+ * These extensions provide support for more Unicode properties.
   * Each ICU release supports the latest version of Unicode available at that time.
   *
+ * <p>For some time before Java 5 added support for supplementary Unicode code points,
+ * The ICU UCharacter class and many other ICU classes already supported them.
+ * Some UCharacter methods and constants were widened slightly differently than
+ * how the Character class methods and constants were widened later.
+ * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF,
+ * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF.
+ *
   * <p>Code points are represented in these API using ints. While it would be
   * more convenient in Java to have a separate primitive datatype for them,
   * ints suffice in the meantime.
@@ -3436,26 +3438,31 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
      // public data members -----------------------------------------------
  
      /**
-     * The lowest Unicode code point value.
+     * The lowest Unicode code point value, constant 0.
+     * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}.
+     *
       * @stable ICU 2.1
       */
-    public static final int MIN_VALUE = UTF16.CODEPOINT_MIN_VALUE;
+    public static final int MIN_VALUE = Character.MIN_CODE_POINT;
  
      /**
-     * The highest Unicode code point value (scalar value) according to the
-     * Unicode Standard.
-     * This is a 21-bit value (21 bits, rounded up).<br>
-     * Up-to-date Unicode implementation of java.lang.Character.MAX_VALUE
+     * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits).
+     * Same as {@link Character#MAX_CODE_POINT}.
+     *
+     * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE}
+     * which is still a char with the value U+FFFF.
+     *
       * @stable ICU 2.1
       */
-    public static final int MAX_VALUE = UTF16.CODEPOINT_MAX_VALUE;
+    public static final int MAX_VALUE = Character.MAX_CODE_POINT;
  
      /**
-     * The minimum value for Supplementary code points
+     * The minimum value for Supplementary code points, constant U+10000.
+     * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
+     *
       * @stable ICU 2.1
       */
-    public static final int SUPPLEMENTARY_MIN_VALUE =
-            UTF16.SUPPLEMENTARY_MIN_VALUE;
+    public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT;
  
      /**
       * Unicode value used when translating into Unicode encoding form and there
@@ -4039,12 +4046,11 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
  
      /**
       * Converts argument code point and returns a String object representing
-     * the code point's value in UTF16 format.
-     * The result is a string whose length is 1 for non-supplementary code
-     * points, 2 otherwise.<br>
-     * com.ibm.ibm.icu.UTF16 can be used to parse Strings generated by this
-     * function.<br>
-     * Up-to-date Unicode implementation of java.lang.Character.toString()
+     * the code point's value in UTF-16 format.
+     * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones.
+     *
+     * <p>Up-to-date Unicode implementation of java.lang.Character.toString().
+     *
       * @param ch code point
       * @return string representation of the code point, null if code point is not
       *         defined in unicode
@@ -4060,10 +4066,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
              return String.valueOf((char)ch);
          }
  
-        StringBuilder result = new StringBuilder();
-        result.append(UTF16.getLeadSurrogate(ch));
-        result.append(UTF16.getTrailSurrogate(ch));
-        return result.toString();
+        return new String(Character.toChars(ch));
      }
  
      /**
@@ -4282,10 +4285,10 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
          if (ch < MIN_VALUE) {
              return false;
          }
-        if (ch < UTF16.SURROGATE_MIN_VALUE) {
+        if (ch < Character.MIN_SURROGATE) {
              return true;
          }
-        if (ch <= UTF16.SURROGATE_MAX_VALUE) {
+        if (ch <= Character.MAX_SURROGATE) {
              return false;
          }
          if (UCharacterUtility.isNonCharacter(ch)) {
@@ -4311,15 +4314,12 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
      {
          int size = str.length();
          int codepoint;
-        for (int i = 0; i < size; i ++)
+        for (int i = 0; i < size; i += Character.charCount(codepoint))
          {
-            codepoint = UTF16.charAt(str, i);
+            codepoint = str.codePointAt(i);
              if (!isLegal(codepoint)) {
                  return false;
              }
-            if (isSupplementary(codepoint)) {
-                i ++;
-            }
          }
          return true;
      }
@@ -4363,8 +4363,8 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
          }
          int cp;
          StringBuilder sb = new StringBuilder();
-        for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
-            cp = UTF16.charAt(s,i);
+        for (int i = 0; i < s.length(); i += Character.charCount(cp)) {
+            cp = s.codePointAt(i);
              if (i != 0) sb.append(separator);
              sb.append(UCharacter.getName(cp));
          }
@@ -4700,28 +4700,30 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
  
  
      /**
-     * {@icu} Returns a code point corresponding to the two UTF16 characters.
+     * {@icu} Returns a code point corresponding to the two surrogate code units.
+     *
       * @param lead the lead char
       * @param trail the trail char
       * @return code point if surrogate characters are valid.
-     * @exception IllegalArgumentException thrown when argument characters do
-     *            not form a valid codepoint
+     * @exception IllegalArgumentException thrown when the code units do
+     *            not form a valid code point
       * @stable ICU 2.1
       */
      public static int getCodePoint(char lead, char trail)
      {
-        if (UTF16.isLeadSurrogate(lead) && UTF16.isTrailSurrogate(trail)) {
-            return UCharacterProperty.getRawSupplementary(lead, trail);
+        if (Character.isSurrogatePair(lead, trail)) {
+            return Character.toCodePoint(lead, trail);
          }
          throw new IllegalArgumentException("Illegal surrogate characters");
      }
  
      /**
-     * {@icu} Returns the code point corresponding to the UTF16 character.
-     * @param char16 the UTF16 character
+     * {@icu} Returns the code point corresponding to the BMP code point.
+     *
+     * @param char16 the BMP code point
       * @return code point if argument is a valid character.
       * @exception IllegalArgumentException thrown when char16 is not a valid
-     *            codepoint
+     *            code point
       * @stable ICU 2.1
       */
      public static int getCodePoint(char char16)
@@ -4753,7 +4755,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
           * If the limit parameter is negative or past the string, then the
           * string length is restored as the iteration limit.
           *
-         * This limit does not affect the next() function which always
+         * <p>This limit does not affect the next() function which always
           * iterates to the very end of the string.
           *
           * @param lim The iteration limit.
@@ -4776,33 +4778,19 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
          /**
           * Iterate forward through the string to fetch the next code point
           * to be case-mapped, and set the context indexes for it.
-         * Performance optimization, to save on function calls and redundant
-         * tests. Combines UTF16.charAt(), UTF16.getCharCount(), and setIndex().
           *
-         * When the iteration limit is reached (and -1 is returned),
+         * <p>When the iteration limit is reached (and -1 is returned),
           * getCPStart() will be at the iteration limit.
           *
-         * Iteration with next() does not affect the position for nextCaseMapCP().
+         * <p>Iteration with next() does not affect the position for nextCaseMapCP().
           *
           * @return The next code point to be case-mapped, or <0 when the iteration is done.
           */
          public int nextCaseMapCP() {
              cpStart=cpLimit;
              if(cpLimit<limit) {
-                int c=s.charAt(cpLimit++);
-                if(UTF16.LEAD_SURROGATE_MIN_VALUE<=c || c<=UTF16.TRAIL_SURROGATE_MAX_VALUE) {
-                    char c2;
-                    if( c<=UTF16.LEAD_SURROGATE_MAX_VALUE && cpLimit<limit &&
-                            UTF16.TRAIL_SURROGATE_MIN_VALUE<=(c2=s.charAt(cpLimit)) &&
-                            c2<=UTF16.TRAIL_SURROGATE_MAX_VALUE
-                            ) {
-                        // supplementary code point
-                        ++cpLimit;
-                        c=UCharacterProperty.getRawSupplementary((char)c, c2);
-                        // else unpaired surrogate code point
-                    }
-                    // else BMP code point
-                }
+                int c=s.codePointAt(cpLimit);
+                cpLimit+=Character.charCount(c);
                  return c;
              } else {
                  return -1;
@@ -4847,12 +4835,12 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
              int c;
  
              if(dir>0 && index<s.length()) {
-                c=UTF16.charAt(s, index);
-                index+=UTF16.getCharCount(c);
+                c=s.codePointAt(index);
+                index+=Character.charCount(c);
                  return c;
              } else if(dir<0 && index>0) {
-                c=UTF16.charAt(s, index-1);
-                index-=UTF16.getCharCount(c);
+                c=s.codePointBefore(index);
+                index-=Character.charCount(c);
                  return c;
              }
              return -1;
@@ -5172,14 +5160,12 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
  
                              int titleLimit=iter.getCPLimit();
                              if(titleLimit<index) {
-                                // TODO: With Java 5, this would want to be
-                                // result.append(str, titleLimit, index);
-                                String appendStr = str.substring(titleLimit,index);
                                  /* Special Case - Dutch IJ Titlecasing */
-                                if ( isDutch && c == 0x0049 && appendStr.startsWith("j")) {
-                                    appendStr = "J" + appendStr.substring(1);
+                                if (isDutch && c == 0x0049 && str.charAt(titleLimit) == 'j') {
+                                    result.append('J').append(str, titleLimit + 1, index);
+                                } else {
+                                    result.append(str, titleLimit, index);
                                  }
-                                result.append(appendStr);
                              }
                              iter.moveToLimit();
                              break;
@@ -5428,8 +5414,8 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
  
          length = str.length();
          for(i=0; i<length;) {
-            c=UTF16.charAt(str, i);
-            i+=UTF16.getCharCount(c);
+            c=str.codePointAt(i);
+            i+=Character.charCount(c);
              c = UCaseProps.INSTANCE.toFullFolding(c, result, options);
  
              /* decode the result */
@@ -5807,17 +5793,17 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
          switch (propertyEnum) {
          case UProperty.AGE: return getAge(codepoint).toString();
          case UProperty.ISO_COMMENT: return getISOComment(codepoint);
-        case UProperty.BIDI_MIRRORING_GLYPH: return UTF16.valueOf(getMirror(codepoint));
-        case UProperty.CASE_FOLDING: return foldCase(UTF16.valueOf(codepoint), true);
-        case UProperty.LOWERCASE_MAPPING: return toLowerCase(UTF16.valueOf(codepoint));
+        case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint));
+        case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true));
+        case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
          case UProperty.NAME: return getName(codepoint);
-        case UProperty.SIMPLE_CASE_FOLDING: return UTF16.valueOf(foldCase(codepoint,true));
-        case UProperty.SIMPLE_LOWERCASE_MAPPING: return UTF16.valueOf(toLowerCase(codepoint));
-        case UProperty.SIMPLE_TITLECASE_MAPPING: return UTF16.valueOf(toTitleCase(codepoint));
-        case UProperty.SIMPLE_UPPERCASE_MAPPING: return UTF16.valueOf(toUpperCase(codepoint));
-        case UProperty.TITLECASE_MAPPING: return toTitleCase(UTF16.valueOf(codepoint),null);
+        case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true));
+        case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
+        case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
+        case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
+        case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
          case UProperty.UNICODE_1_NAME: return getName1_0(codepoint);
-        case UProperty.UPPERCASE_MAPPING: return toUpperCase(UTF16.valueOf(codepoint));
+        case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
          }
          throw new IllegalArgumentException("Illegal Property Enum");
      }
@@ -5888,67 +5874,67 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
      // JDK 1.5 API coverage
  
      /**
-     * Cover the JDK 1.5 API, for convenience.
-     * @see UTF16#LEAD_SURROGATE_MIN_VALUE
+     * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}.
+     *
       * @stable ICU 3.0
       */
-    public static final char MIN_HIGH_SURROGATE = UTF16.LEAD_SURROGATE_MIN_VALUE;
+    public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE;
  
      /**
-     * Cover the JDK 1.5 API, for convenience.
-     * @see UTF16#LEAD_SURROGATE_MAX_VALUE
+     * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}.
+     *
       * @stable ICU 3.0
       */
-    public static final char MAX_HIGH_SURROGATE = UTF16.LEAD_SURROGATE_MAX_VALUE;
+    public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE;
  
      /**
-     * Cover the JDK 1.5 API, for convenience.
-     * @see UTF16#TRAIL_SURROGATE_MIN_VALUE
+     * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}.
+     *
       * @stable ICU 3.0
       */
-    public static final char MIN_LOW_SURROGATE = UTF16.TRAIL_SURROGATE_MIN_VALUE;
+    public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE;
  
      /**
-     * Cover the JDK 1.5 API, for convenience.
-     * @see UTF16#TRAIL_SURROGATE_MAX_VALUE
+     * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}.
+     *
       * @stable ICU 3.0
       */
-    public static final char MAX_LOW_SURROGATE = UTF16.TRAIL_SURROGATE_MAX_VALUE;
+    public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE;
  
      /**
-     * Cover the JDK 1.5 API, for convenience.
-     * @see UTF16#SURROGATE_MIN_VALUE
+     * Constant U+D800, same as {@link Character#MIN_SURROGATE}.
+     *
       * @stable ICU 3.0
       */
-    public static final char MIN_SURROGATE = UTF16.SURROGATE_MIN_VALUE;
+    public static final char MIN_SURROGATE = Character.MIN_SURROGATE;
  
      /**
-     * Cover the JDK 1.5 API, for convenience.
-     * @see UTF16#SURROGATE_MAX_VALUE
+     * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}.
+     *
       * @stable ICU 3.0
       */
-    public static final char MAX_SURROGATE = UTF16.SURROGATE_MAX_VALUE;
+    public static final char MAX_SURROGATE = Character.MAX_SURROGATE;
  
      /**
-     * Cover the JDK 1.5 API, for convenience.
-     * @see UTF16#SUPPLEMENTARY_MIN_VALUE
+     * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
+     *
       * @stable ICU 3.0
       */
-    public static final int  MIN_SUPPLEMENTARY_CODE_POINT = UTF16.SUPPLEMENTARY_MIN_VALUE;
+    public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT;
  
      /**
-     * Cover the JDK 1.5 API, for convenience.
-     * @see UTF16#CODEPOINT_MAX_VALUE
+     * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}.
+     *
       * @stable ICU 3.0
       */
-    public static final int  MAX_CODE_POINT = UTF16.CODEPOINT_MAX_VALUE;
+    public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT;
  
      /**
-     * Cover the JDK 1.5 API, for convenience.
-     * @see UTF16#CODEPOINT_MIN_VALUE
+     * Constant U+0000, same as {@link Character#MIN_CODE_POINT}.
+     *
       * @stable ICU 3.0
       */
-    public static final int  MIN_CODE_POINT = UTF16.CODEPOINT_MIN_VALUE;
+    public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT;
  
      /**
       * Cover the JDK 1.5 API, for convenience.
@@ -5961,77 +5947,82 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
      }
  
      /**
-     * Cover the JDK 1.5 API, for convenience.
+     * Same as {@link Character#isSupplementaryCodePoint}.
+     *
       * @param cp the code point to check
       * @return true if cp is a supplementary code point
       * @stable ICU 3.0
       */
      public static final boolean isSupplementaryCodePoint(int cp) {
-        return cp >= UTF16.SUPPLEMENTARY_MIN_VALUE
-                && cp <= UTF16.CODEPOINT_MAX_VALUE;
+        return Character.isSupplementaryCodePoint(cp);
      }
  
      /**
-     * Cover the JDK 1.5 API, for convenience.
+     * Same as {@link Character#isHighSurrogate}.
+     *
       * @param ch the char to check
       * @return true if ch is a high (lead) surrogate
       * @stable ICU 3.0
       */
      public static boolean isHighSurrogate(char ch) {
-        return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
+        return Character.isHighSurrogate(ch);
      }
  
      /**
-     * Cover the JDK 1.5 API, for convenience.
+     * Same as {@link Character#isLowSurrogate}.
+     *
       * @param ch the char to check
       * @return true if ch is a low (trail) surrogate
       * @stable ICU 3.0
       */
      public static boolean isLowSurrogate(char ch) {
-        return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
+        return Character.isLowSurrogate(ch);
      }
  
      /**
-     * Cover the JDK 1.5 API, for convenience.  Return true if the chars
-     * form a valid surrogate pair.
+     * Same as {@link Character#isSurrogatePair}.
+     *
       * @param high the high (lead) char
       * @param low the low (trail) char
       * @return true if high, low form a surrogate pair
       * @stable ICU 3.0
       */
      public static final boolean isSurrogatePair(char high, char low) {
-        return isHighSurrogate(high) && isLowSurrogate(low);
+        return Character.isSurrogatePair(high, low);
      }
  
      /**
-     * Cover the JDK 1.5 API, for convenience.  Return the number of chars needed
-     * to represent the code point.  This does not check the
-     * code point for validity.
+     * Same as {@link Character#charCount}.
+     * Returns the number of chars needed to represent the code point (1 or 2).
+     * This does not check the code point for validity.
+     *
       * @param cp the code point to check
       * @return the number of chars needed to represent the code point
-     * @see UTF16#getCharCount
       * @stable ICU 3.0
       */
      public static int charCount(int cp) {
-        return UTF16.getCharCount(cp);
+        return Character.charCount(cp);
      }
  
      /**
-     * Cover the JDK 1.5 API, for convenience.  Return the code point represented by
-     * the characters.  This does not check the surrogate pair for validity.
+     * Same as {@link Character#toCodePoint}.
+     * Returns the code point represented by the two surrogate code units.
+     * This does not check the surrogate pair for validity.
+     *
       * @param high the high (lead) surrogate
       * @param low the low (trail) surrogate
       * @return the code point formed by the surrogate pair
       * @stable ICU 3.0
       */
      public static final int toCodePoint(char high, char low) {
-        return UCharacterProperty.getRawSupplementary(high, low);
+        return Character.toCodePoint(high, low);
      }
  
      /**
-     * Cover the JDK 1.5 API, for convenience.  Return the code point at index.
-     * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
-     * API.  This examines only the characters at index and index+1.
+     * Same as {@link Character#codePointAt(CharSequence, int)}.
+     * Returns the code point at index.
+     * This examines only the characters at index and index+1.
+     *
       * @param seq the characters to check
       * @param index the index of the first or only char forming the code point
       * @return the code point at the index
@@ -6051,9 +6042,10 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
      }
  
      /**
-     * Cover the JDK 1.5 API, for convenience.  Return the code point at index.
-     * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
-     * API.  This examines only the characters at index and index+1.
+     * Same as {@link Character#codePointAt(char[], int)}.
+     * Returns the code point at index.
+     * This examines only the characters at index and index+1.
+     *
       * @param text the characters to check
       * @param index the index of the first or only char forming the code point
       * @return the code point at the index
@@ -6073,9 +6065,10 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
      }
  
      /**
-     * Cover the JDK 1.5 API, for convenience.  Return the code point at index.
-     * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
-     * API.  This examines only the characters at index and index+1.
+     * Same as {@link Character#codePointAt(char[], int, int)}.
+     * Returns the code point at index.
+     * This examines only the characters at index and index+1.
+     *
       * @param text the characters to check
       * @param index the index of the first or only char forming the code point
       * @param limit the limit of the valid text
@@ -6099,9 +6092,10 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
      }
  
      /**
-     * Cover the JDK 1.5 API, for convenience.  Return the code point before index.
-     * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
-     * API.  This examines only the characters at index-1 and index-2.
+     * Same as {@link Character#codePointBefore(CharSequence, int)}.
+     * Return the code point before index.
+     * This examines only the characters at index-1 and index-2.
+     *
       * @param seq the characters to check
       * @param index the index after the last or only char forming the code point
       * @return the code point before the index
@@ -6121,9 +6115,10 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
      }
  
      /**
-     * Cover the JDK 1.5 API, for convenience.  Return the code point before index.
-     * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
-     * API.  This examines only the characters at index-1 and index-2.
+     * Same as {@link Character#codePointBefore(char[], int)}.
+     * Returns the code point before index.
+     * This examines only the characters at index-1 and index-2.
+     *
       * @param text the characters to check
       * @param index the index after the last or only char forming the code point
       * @return the code point before the index
@@ -6143,9 +6138,10 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
      }
  
      /**
-     * Cover the JDK 1.5 API, for convenience.  Return the code point before index.
-     * <br/><b>Note</b>: the semantics of this API is different from the related UTF16
-     * API.  This examines only the characters at index-1 and index-2.
+     * Same as {@link Character#codePointBefore(char[], int, int)}.
+     * Return the code point before index.
+     * This examines only the characters at index-1 and index-2.
+     *
       * @param text the characters to check
       * @param index the index after the last or only char forming the code point
       * @param limit the start of the valid text
@@ -6169,8 +6165,10 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
      }
  
      /**
-     * Cover the JDK 1.5 API, for convenience.  Writes the chars representing the
+     * Same as {@link Character#toChars(int, char[], int)}.
+     * Writes the chars representing the
       * code point into the destination at the given index.
+     *
       * @param cp the code point to convert
       * @param dst the destination array into which to put the char(s) representing the code point
       * @param dstIndex the index at which to put the first (or only) char
@@ -6179,41 +6177,20 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
       * @stable ICU 3.0
       */
      public static final int toChars(int cp, char[] dst, int dstIndex) {
-        if (cp >= 0) {
-            if (cp < MIN_SUPPLEMENTARY_CODE_POINT) {
-                dst[dstIndex] = (char)cp;
-                return 1;
-            }
-            if (cp <= MAX_CODE_POINT) {
-                dst[dstIndex] = UTF16.getLeadSurrogate(cp);
-                dst[dstIndex+1] = UTF16.getTrailSurrogate(cp);
-                return 2;
-            }
-        }
-        throw new IllegalArgumentException();
+        return Character.toChars(cp, dst, dstIndex);
      }
  
      /**
-     * Cover the JDK 1.5 API, for convenience.  Returns a char array
-     * representing the code point.
+     * Same as {@link Character#toChars(int)}.
+     * Returns a char array representing the code point.
+     *
       * @param cp the code point to convert
       * @return an array containing the char(s) representing the code point
       * @throws IllegalArgumentException if cp is not a valid code point
       * @stable ICU 3.0
       */
      public static final char[] toChars(int cp) {
-        if (cp >= 0) {
-            if (cp < MIN_SUPPLEMENTARY_CODE_POINT) {
-                return new char[] { (char)cp };
-            }
-            if (cp <= MAX_CODE_POINT) {
-                return new char[] {
-                        UTF16.getLeadSurrogate(cp),
-                        UTF16.getTrailSurrogate(cp)
-                };
-            }
-        }
-        throw new IllegalArgumentException();
+        return Character.toChars(cp);
      }
  
      /**
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/UCharacterIterator.java b/icu4j/main/classes/core/src/com/ibm/icu/text/UCharacterIterator.java

index 3421b44c88ab73ed4ec58c399fddbe2f37e25425..c3b46406c67f1d83b7cf5f5c7cdff9243f81fa9a 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/UCharacterIterator.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/UCharacterIterator.java
@@ -1,7 +1,7 @@
  /*
   *******************************************************************************
- * Copyright (C) 1996-2014, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 1996-2015, International Business Machines Corporation and
+ * others. All Rights Reserved.
   *******************************************************************************
   */
  package com.ibm.icu.text;
@@ -159,9 +159,7 @@ public abstract class UCharacterIterator
              if(UTF16.isTrailSurrogate((char)ch2)){
                  // we found a surrogate pair 
                  // return the codepoint
-                return UCharacterProperty.getRawSupplementary(
-                                                          (char)ch,(char)ch2
-                                                             );
+                return Character.toCodePoint((char)ch, (char)ch2);
              }
          }
          return ch;
@@ -210,8 +208,7 @@ public abstract class UCharacterIterator
          if(UTF16.isLeadSurrogate((char)ch1)){
              int ch2 = next();
              if(UTF16.isTrailSurrogate((char)ch2)){
-                return UCharacterProperty.getRawSupplementary((char)ch1,
-                                                              (char)ch2);
+                return Character.toCodePoint((char)ch1, (char)ch2);
              }else if (ch2 != DONE) {
                  // unmatched surrogate so back out
                  previous();
@@ -248,8 +245,7 @@ public abstract class UCharacterIterator
          if(UTF16.isTrailSurrogate((char)ch1)){
              int ch2 = previous();
              if(UTF16.isLeadSurrogate((char)ch2)){
-                return UCharacterProperty.getRawSupplementary((char)ch2,
-                                                              (char)ch1);
+                return Character.toCodePoint((char)ch2, (char)ch1);
              }else if (ch2 != DONE) {
                  //unmatched trail surrogate so back out
                  next();
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/UTF16.java b/icu4j/main/classes/core/src/com/ibm/icu/text/UTF16.java

index b3dc2fbbaed1700b0081bd3ad4f8b92a9c4799c7..21078a5aa1105c25f640f63a9ab7f3d440469a71 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/UTF16.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/UTF16.java
@@ -1,14 +1,12 @@
  /**
   *******************************************************************************
- * Copyright (C) 1996-2014, International Business Machines Corporation and
+ * Copyright (C) 1996-2015, International Business Machines Corporation and
   * others. All Rights Reserved.
   *******************************************************************************
   */
  
  package com.ibm.icu.text;
  
-import com.ibm.icu.impl.UCharacterProperty;
-
  /**
   * <p>
   * Standalone utility class providing UTF16 character conversions and indexing conversions.
@@ -237,7 +235,7 @@ public final class UTF16 {
              if (source.length() != offset16) {
                  char trail = source.charAt(offset16);
                  if (trail >= TRAIL_SURROGATE_MIN_VALUE && trail <= TRAIL_SURROGATE_MAX_VALUE) {
-                    return UCharacterProperty.getRawSupplementary(single, trail);
+                    return Character.toCodePoint(single, trail);
                  }
              }
          } else {
@@ -246,7 +244,7 @@ public final class UTF16 {
                  // single is a trail surrogate so
                  char lead = source.charAt(offset16);
                  if (lead >= LEAD_SURROGATE_MIN_VALUE && lead <= LEAD_SURROGATE_MAX_VALUE) {
-                    return UCharacterProperty.getRawSupplementary(lead, single);
+                    return Character.toCodePoint(lead, single);
                  }
              }
          }
@@ -292,7 +290,7 @@ public final class UTF16 {
                  char trail = source.charAt(offset16);
                  if (trail >= UTF16.TRAIL_SURROGATE_MIN_VALUE
                          && trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) {
-                    return UCharacterProperty.getRawSupplementary(single, trail);
+                    return Character.toCodePoint(single, trail);
                  }
              }
          } else {
@@ -302,7 +300,7 @@ public final class UTF16 {
                  char lead = source.charAt(offset16);
                  if (lead >= UTF16.LEAD_SURROGATE_MIN_VALUE
                          && lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
-                    return UCharacterProperty.getRawSupplementary(lead, single);
+                    return Character.toCodePoint(lead, single);
                  }
              }
          }
@@ -344,7 +342,7 @@ public final class UTF16 {
              if (source.length() != offset16) {
                  char trail = source.charAt(offset16);
                  if (isTrailSurrogate(trail))
-                    return UCharacterProperty.getRawSupplementary(single, trail);
+                    return Character.toCodePoint(single, trail);
              }
          } else {
              --offset16;
@@ -352,7 +350,7 @@ public final class UTF16 {
                  // single is a trail surrogate so
                  char lead = source.charAt(offset16);
                  if (isLeadSurrogate(lead)) {
-                    return UCharacterProperty.getRawSupplementary(lead, single);
+                    return Character.toCodePoint(lead, single);
                  }
              }
          }
@@ -398,7 +396,7 @@ public final class UTF16 {
              }
              char trail = source[offset16];
              if (isTrailSurrogate(trail)) {
-                return UCharacterProperty.getRawSupplementary(single, trail);
+                return Character.toCodePoint(single, trail);
              }
          } else { // isTrailSurrogate(single), so
              if (offset16 == start) {
@@ -407,7 +405,7 @@ public final class UTF16 {
              offset16--;
              char lead = source[offset16];
              if (isLeadSurrogate(lead))
-                return UCharacterProperty.getRawSupplementary(lead, single);
+                return Character.toCodePoint(lead, single);
          }
          return single; // return unmatched surrogate
      }
@@ -447,7 +445,7 @@ public final class UTF16 {
              if (source.length() != offset16) {
                  char trail = source.charAt(offset16);
                  if (isTrailSurrogate(trail))
-                    return UCharacterProperty.getRawSupplementary(single, trail);
+                    return Character.toCodePoint(single, trail);
              }
          } else {
              --offset16;
@@ -455,7 +453,7 @@ public final class UTF16 {
                  // single is a trail surrogate so
                  char lead = source.charAt(offset16);
                  if (isLeadSurrogate(lead)) {
-                    return UCharacterProperty.getRawSupplementary(lead, single);
+                    return Character.toCodePoint(lead, single);
                  }
              }
          }
author	Markus Scherer <markus.icu@gmail.com>
	Wed, 9 Sep 2015 00:13:06 +0000 (00:13 +0000)
committer	Markus Scherer <markus.icu@gmail.com>
	Wed, 9 Sep 2015 00:13:06 +0000 (00:13 +0000)
icu4j/main/classes/core/src/com/ibm/icu/impl/BMPSet.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/ReplaceableUCharacterIterator.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/TrieIterator.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterProperty.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/UnicodeSetStringSpan.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/Utility.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/text/UCharacterIterator.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/text/UTF16.java		patch \| blob \| history