ICU-8575 option for not including utf headers by default; replace uses of deprecated...

author Markus Scherer <markus.icu@gmail.com>

Wed, 27 Jul 2011 05:53:56 +0000 (05:53 +0000)

committer Markus Scherer <markus.icu@gmail.com>

Wed, 27 Jul 2011 05:53:56 +0000 (05:53 +0000)
author Markus Scherer <markus.icu@gmail.com>
Wed, 27 Jul 2011 05:53:56 +0000 (05:53 +0000)
committer Markus Scherer <markus.icu@gmail.com>
Wed, 27 Jul 2011 05:53:56 +0000 (05:53 +0000)
diff --git a/icu4c/readme.html b/icu4c/readme.html

index 56c27baa1d6e7722412def11d8567f06474416fe..982cea9722a54f265c3c008722fe49df036400dd 100644 (file)
--- a/icu4c/readme.html
+++ b/icu4c/readme.html
@@ -594,7 +594,8 @@
          This can lead to inadvertent construction of a <code>UnicodeString</code>
          with a single character by using an integer,
          and it can lead to inadvertent dependency on the conversion framework
-        by using a C string literal.
+        by using a C string literal.<br>
+        Beginning with ICU 49, you should do the following:
          <ul>
            <li>Consider marking the from-<code>UChar</code>
              and from-<code>UChar32</code> constructors explicit via
@@ -602,7 +603,18 @@
            <li>Consider marking the from-<code>const char*</code> and
              from-<code>const UChar*</code></li> constructors explicit via
              <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> or similar.</li>
+        Note: The ICU test suites cannot be compiled with these settings.
        </li>
+      <li><b>utf.h, utf8.h, utf16.h, utf_old.h:</b>
+        By default, utypes.h (and thus almost every public ICU header)
+        includes all of these header files.
+        Often, none of them are needed, or only one or two of them.
+        All of utf_old.h is deprecated or obsolete.<br>
+        Beginning with ICU 49,
+        you should define <code>U_NO_DEFAULT_INCLUDE_UTF_HEADERS</code> to 1
+        (via -D or uconfig.h, as above)
+        and include those header files explicitly that you actually need.<br>
+        Note: The ICU test suites cannot be compiled with this setting.</li>
        <li><b>.dat file:</b> By default, the ICU data is built into
          a shared library (DLL). This is convenient because it requires no
          install-time or runtime configuration,
diff --git a/icu4c/source/common/appendable.cpp b/icu4c/source/common/appendable.cpp

index 1fd4c38d1d58517be280c24e59de6f864ab05797..4d672fc051540d596b7226063f9a8621a73792be 100644 (file)
--- a/icu4c/source/common/appendable.cpp
+++ b/icu4c/source/common/appendable.cpp
@@ -14,6 +14,7 @@
  
  #include "unicode/utypes.h"
  #include "unicode/appendable.h"
+#include "unicode/utf16.h"
  
  U_NAMESPACE_BEGIN
  
diff --git a/icu4c/source/common/bmpset.cpp b/icu4c/source/common/bmpset.cpp

index db87c705667b4ae3bfba26bc36ccb2aabedb48ed..e3f98cbeb73871c3815389a5e0067606c1b31b92 100644 (file)
--- a/icu4c/source/common/bmpset.cpp
+++ b/icu4c/source/common/bmpset.cpp
@@ -1,7 +1,7 @@
  /*
  ******************************************************************************
  *
-*   Copyright (C) 2007-2008, International Business Machines
+*   Copyright (C) 2007-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  ******************************************************************************
@@ -16,6 +16,8 @@
  
  #include "unicode/utypes.h"
  #include "unicode/uniset.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
  #include "cmemory.h"
  #include "bmpset.h"
  
diff --git a/icu4c/source/common/caniter.cpp b/icu4c/source/common/caniter.cpp

index 1eaf6d20f214e20d7f5690f37ad33fcd4028599f..37ca8dfb50e67b51ed7f3005a98ef2d7e7b6bf1d 100644 (file)
--- a/icu4c/source/common/caniter.cpp
+++ b/icu4c/source/common/caniter.cpp
@@ -15,6 +15,7 @@
  #include "unicode/uniset.h"
  #include "unicode/usetiter.h"
  #include "unicode/ustring.h"
+#include "unicode/utf16.h"
  #include "cmemory.h"
  #include "hash.h"
  #include "normalizer2impl.h"
@@ -207,14 +208,14 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
  
      // i should initialy be the number of code units at the 
      // start of the string
-    i = UTF16_CHAR_LENGTH(source.char32At(0));
+    i = U16_LENGTH(source.char32At(0));
      //int32_t i = 1;
      // find the segments
      // This code iterates through the source string and 
      // extracts segments that end up on a codepoint that
      // doesn't start any decompositions. (Analysis is done
      // on the NFD form - see above).
-    for (; i < source.length(); i += UTF16_CHAR_LENGTH(cp)) {
+    for (; i < source.length(); i += U16_LENGTH(cp)) {
          cp = source.char32At(i);
          if (nfcImpl.isCanonSegmentStarter(cp)) {
              source.extract(start, i-start, list[list_length++]); // add up to i
@@ -290,7 +291,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros
      }
      subpermute.setValueDeleter(uprv_deleteUObject);
  
-    for (i = 0; i < source.length(); i += UTF16_CHAR_LENGTH(cp)) {
+    for (i = 0; i < source.length(); i += U16_LENGTH(cp)) {
          cp = source.char32At(i);
          const UHashElement *ne = NULL;
          int32_t el = -1;
@@ -308,7 +309,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros
  
          // see what the permutations of the characters before and after this one are
          //Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
-        permute(subPermuteString.replace(i, UTF16_CHAR_LENGTH(cp), NULL, 0), skipZeros, &subpermute, status);
+        permute(subPermuteString.replace(i, U16_LENGTH(cp), NULL, 0), skipZeros, &subpermute, status);
          /* Test for buffer overflows */
          if(U_FAILURE(status)) {
              return;
@@ -442,9 +443,9 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UCh
  
      // cycle through all the characters
      UChar32 cp;
-    for (int32_t i = 0; i < segLen; i += UTF16_CHAR_LENGTH(cp)) {
+    for (int32_t i = 0; i < segLen; i += U16_LENGTH(cp)) {
          // see if any character is at the start of some decomposition
-        UTF_GET_CHAR(segment, 0, i, segLen, cp);
+        U16_GET(segment, 0, i, segLen, cp);
          if (!nfcImpl.getCanonStartSet(cp, starts)) {
              continue;
          }
diff --git a/icu4c/source/common/messagepattern.cpp b/icu4c/source/common/messagepattern.cpp

index 0f09f277279503fb1d4f54aa07bffb95d5dbda25..af1d6e41d2eed8ca1a4fdd311043c7cc5e950eeb 100644 (file)
--- a/icu4c/source/common/messagepattern.cpp
+++ b/icu4c/source/common/messagepattern.cpp
@@ -18,6 +18,7 @@
  
  #include "unicode/messagepattern.h"
  #include "unicode/unistr.h"
+#include "unicode/utf16.h"
  #include "cmemory.h"
  #include "cstring.h"
  #include "messageimpl.h"
diff --git a/icu4c/source/common/normalizer2impl.cpp b/icu4c/source/common/normalizer2impl.cpp

index 87b34408e662b021482b742ff8397fdc1eb80c12..f33589d4e62debddb2c41a207d6f735a9bbbb150 100644 (file)
--- a/icu4c/source/common/normalizer2impl.cpp
+++ b/icu4c/source/common/normalizer2impl.cpp
@@ -21,6 +21,7 @@
  #include "unicode/normalizer2.h"
  #include "unicode/udata.h"
  #include "unicode/ustring.h"
+#include "unicode/utf16.h"
  #include "cmemory.h"
  #include "mutex.h"
  #include "normalizer2impl.h"
diff --git a/icu4c/source/common/normalizer2impl.h b/icu4c/source/common/normalizer2impl.h

index 4ff2386ee83ece04f851b8bca84a5de21fa99f3a..304fc1b747a1a899b30aebec567bebc388b63748 100644 (file)
--- a/icu4c/source/common/normalizer2impl.h
+++ b/icu4c/source/common/normalizer2impl.h
@@ -25,6 +25,7 @@
  #include "unicode/udata.h"
  #include "unicode/unistr.h"
  #include "unicode/unorm.h"
+#include "unicode/utf16.h"
  #include "mutex.h"
  #include "uset_imp.h"
  #include "utrie2.h"
diff --git a/icu4c/source/common/normlzr.cpp b/icu4c/source/common/normlzr.cpp

index 0e2b6b634992961f1f50d47b0a90cf229bbbe9fa..b9580537b232ae53909a452c5d31a658314058a9 100644 (file)
--- a/icu4c/source/common/normlzr.cpp
+++ b/icu4c/source/common/normlzr.cpp
@@ -16,6 +16,7 @@
  #include "unicode/schriter.h"
  #include "unicode/uchriter.h"
  #include "unicode/normlzr.h"
+#include "unicode/utf16.h"
  #include "cmemory.h"
  #include "normalizer2impl.h"
  #include "uprops.h"  // for uniset_getUnicode32Instance()
@@ -262,7 +263,7 @@ UChar32 Normalizer::current() {
  UChar32 Normalizer::next() {
      if(bufferPos<buffer.length() ||  nextNormalize()) {
          UChar32 c=buffer.char32At(bufferPos);
-        bufferPos+=UTF_CHAR_LENGTH(c);
+        bufferPos+=U16_LENGTH(c);
          return c;
      } else {
          return DONE;
@@ -277,7 +278,7 @@ UChar32 Normalizer::next() {
  UChar32 Normalizer::previous() {
      if(bufferPos>0 || previousNormalize()) {
          UChar32 c=buffer.char32At(bufferPos-1);
-        bufferPos-=UTF_CHAR_LENGTH(c);
+        bufferPos-=U16_LENGTH(c);
          return c;
      } else {
          return DONE;
diff --git a/icu4c/source/common/punycode.cpp b/icu4c/source/common/punycode.cpp

index e3c5f7eff4771db52b46a0647eb04adb67d8f1c4..825990324b1201fa668439aec900683f5bf3b633 100644 (file)
--- a/icu4c/source/common/punycode.cpp
+++ b/icu4c/source/common/punycode.cpp
@@ -46,11 +46,13 @@ Disclaimer and license
  
  #if !UCONFIG_NO_IDNA
  
+#include "unicode/ustring.h"
+#include "unicode/utf.h"
+#include "unicode/utf16.h"
  #include "ustr_imp.h"
  #include "cstring.h"
  #include "cmemory.h"
  #include "punycode.h"
-#include "unicode/ustring.h"
  
  
  /* Punycode ----------------------------------------------------------------- */
@@ -222,11 +224,11 @@ u_strToPunycode(const UChar *src, int32_t srcLength,
                  ++destLength;
              } else {
                  n=(caseFlags!=NULL && caseFlags[j])<<31L;
-                if(UTF_IS_SINGLE(c)) {
+                if(U16_IS_SINGLE(c)) {
                      n|=c;
-                } else if(UTF_IS_LEAD(c) && UTF_IS_TRAIL(c2=src[j+1])) {
+                } else if(U16_IS_LEAD(c) && U16_IS_TRAIL(c2=src[j+1])) {
                      ++j;
-                    n|=(int32_t)UTF16_GET_PAIR_VALUE(c, c2);
+                    n|=(int32_t)U16_GET_SUPPLEMENTARY(c, c2);
                  } else {
                      /* error: unmatched surrogate */
                      *pErrorCode=U_INVALID_CHAR_FOUND;
@@ -255,11 +257,11 @@ u_strToPunycode(const UChar *src, int32_t srcLength,
                  ++destLength;
              } else {
                  n=(caseFlags!=NULL && caseFlags[j])<<31L;
-                if(UTF_IS_SINGLE(c)) {
+                if(U16_IS_SINGLE(c)) {
                      n|=c;
-                } else if(UTF_IS_LEAD(c) && (j+1)<srcLength && UTF_IS_TRAIL(c2=src[j+1])) {
+                } else if(U16_IS_LEAD(c) && (j+1)<srcLength && U16_IS_TRAIL(c2=src[j+1])) {
                      ++j;
-                    n|=(int32_t)UTF16_GET_PAIR_VALUE(c, c2);
+                    n|=(int32_t)U16_GET_SUPPLEMENTARY(c, c2);
                  } else {
                      /* error: unmatched surrogate */
                      *pErrorCode=U_INVALID_CHAR_FOUND;
@@ -510,14 +512,14 @@ u_strFromPunycode(const UChar *src, int32_t srcLength,
          /* not needed for Punycode: */
          /* if (decode_digit(n) <= BASE) return punycode_invalid_input; */
  
-        if(n>0x10ffff || UTF_IS_SURROGATE(n)) {
+        if(n>0x10ffff || U_IS_SURROGATE(n)) {
              /* Unicode code point overflow */
              *pErrorCode=U_ILLEGAL_CHAR_FOUND;
              return 0;
          }
  
          /* Insert n at position i of the output: */
-        cpLength=UTF_CHAR_LENGTH(n);
+        cpLength=U16_LENGTH(n);
          if((destLength+cpLength)<=destCapacity) {
              int32_t codeUnitIndex;
  
@@ -540,7 +542,7 @@ u_strFromPunycode(const UChar *src, int32_t srcLength,
                  }
              } else {
                  codeUnitIndex=firstSupplementaryIndex;
-                UTF_FWD_N(dest, codeUnitIndex, destLength, i-codeUnitIndex);
+                U16_FWD_N(dest, codeUnitIndex, destLength, i-codeUnitIndex);
              }
  
              /* use the UChar index codeUnitIndex instead of the code point index i */
@@ -559,8 +561,8 @@ u_strFromPunycode(const UChar *src, int32_t srcLength,
                  dest[codeUnitIndex]=(UChar)n;
              } else {
                  /* supplementary character, insert two code units */
-                dest[codeUnitIndex]=UTF16_LEAD(n);
-                dest[codeUnitIndex+1]=UTF16_TRAIL(n);
+                dest[codeUnitIndex]=U16_LEAD(n);
+                dest[codeUnitIndex+1]=U16_TRAIL(n);
              }
              if(caseFlags!=NULL) {
                  /* Case of last character determines uppercase flag: */
diff --git a/icu4c/source/common/ruleiter.cpp b/icu4c/source/common/ruleiter.cpp

index 0c83671ffaad6aae2418c68570de8d00d04b5397..667795efabcf39632c79a8c2480a8656099bce02 100644 (file)
--- a/icu4c/source/common/ruleiter.cpp
+++ b/icu4c/source/common/ruleiter.cpp
@@ -10,8 +10,9 @@
  */
  #include "ruleiter.h"
  #include "unicode/parsepos.h"
-#include "unicode/unistr.h"
  #include "unicode/symtable.h"
+#include "unicode/unistr.h"
+#include "unicode/utf16.h"
  #include "patternprops.h"
  
  /* \U87654321 or \ud800\udc00 */
@@ -40,7 +41,7 @@ UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCod
  
      for (;;) {
          c = _current();
-        _advance(UTF_CHAR_LENGTH(c));
+        _advance(U16_LENGTH(c));
  
          if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
              (options & PARSE_VARIABLES) != 0 && sym != 0) {
@@ -102,7 +103,7 @@ void RuleCharacterIterator::skipIgnored(int32_t options) {
          for (;;) {
              UChar32 a = _current();
              if (!PatternProps::isWhiteSpace(a)) break;
-            _advance(UTF_CHAR_LENGTH(a));
+            _advance(U16_LENGTH(a));
          }
      }
  }
diff --git a/icu4c/source/common/ubidi.c b/icu4c/source/common/ubidi.c

index 60953ad9e03f398ba966dbb62959814838b560e1..cb2dee9bf26e8ccde64fd0fba37806173e6ddd2f 100644 (file)
--- a/icu4c/source/common/ubidi.c
+++ b/icu4c/source/common/ubidi.c
@@ -19,6 +19,7 @@
  #include "unicode/ustring.h"
  #include "unicode/uchar.h"
  #include "unicode/ubidi.h"
+#include "unicode/utf16.h"
  #include "ubidi_props.h"
  #include "ubidiimp.h"
  #include "uassert.h"
diff --git a/icu4c/source/common/ubidiwrt.c b/icu4c/source/common/ubidiwrt.c

index 34b13711d2c2f13e5a2ed5657164769204aa97cf..3f1053e39f9ead0d56a3b09fe8fa6e13bd5ae1d9 100644 (file)
--- a/icu4c/source/common/ubidiwrt.c
+++ b/icu4c/source/common/ubidiwrt.c
@@ -1,7 +1,7 @@
  /*
  ******************************************************************************
  *
-*   Copyright (C) 2000-2007, International Business Machines
+*   Copyright (C) 2000-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  ******************************************************************************
@@ -26,6 +26,7 @@
  #include "unicode/ustring.h"
  #include "unicode/uchar.h"
  #include "unicode/ubidi.h"
+#include "unicode/utf16.h"
  #include "cmemory.h"
  #include "ustr_imp.h"
  #include "ubidiimp.h"
@@ -86,9 +87,9 @@ doWriteForward(const UChar *src, int32_t srcLength,
              return srcLength;
          }
          do {
-            UTF_NEXT_CHAR(src, i, srcLength, c);
+            U16_NEXT(src, i, srcLength, c);
              c=u_charMirror(c);
-            UTF_APPEND_CHAR_UNSAFE(dest, j, c);
+            U16_APPEND_UNSAFE(dest, j, c);
          } while(i<srcLength);
          return srcLength;
      }
@@ -123,7 +124,7 @@ doWriteForward(const UChar *src, int32_t srcLength,
          UChar32 c;
          do {
              i=0;
-            UTF_NEXT_CHAR(src, i, srcLength, c);
+            U16_NEXT(src, i, srcLength, c);
              src+=i;
              srcLength-=i;
              if(!IS_BIDI_CONTROL_CHAR(c)) {
@@ -142,7 +143,7 @@ doWriteForward(const UChar *src, int32_t srcLength,
                      return destSize-remaining;
                  }
                  c=u_charMirror(c);
-                UTF_APPEND_CHAR_UNSAFE(dest, j, c);
+                U16_APPEND_UNSAFE(dest, j, c);
              }
          } while(srcLength>0);
          return j;
@@ -197,7 +198,7 @@ doWriteReverse(const UChar *src, int32_t srcLength,
              i=srcLength;
  
              /* collect code units for one base character */
-            UTF_BACK_1(src, 0, srcLength);
+            U16_BACK_1(src, 0, srcLength);
  
              /* copy this base character */
              j=srcLength;
@@ -226,7 +227,7 @@ doWriteReverse(const UChar *src, int32_t srcLength,
  
              /* collect code units and modifier letters for one base character */
              do {
-                UTF_PREV_CHAR(src, 0, srcLength, c);
+                U16_PREV(src, 0, srcLength, c);
              } while(srcLength>0 && IS_COMBINING(u_charType(c)));
  
              /* copy this "user character" */
@@ -274,11 +275,11 @@ doWriteReverse(const UChar *src, int32_t srcLength,
              i=srcLength;
  
              /* collect code units for one base character */
-            UTF_PREV_CHAR(src, 0, srcLength, c);
+            U16_PREV(src, 0, srcLength, c);
              if(options&UBIDI_KEEP_BASE_COMBINING) {
                  /* collect modifier letters for this base character */
                  while(srcLength>0 && IS_COMBINING(u_charType(c))) {
-                    UTF_PREV_CHAR(src, 0, srcLength, c);
+                    U16_PREV(src, 0, srcLength, c);
                  }
              }
  
@@ -293,7 +294,7 @@ doWriteReverse(const UChar *src, int32_t srcLength,
                  /* mirror only the base character */
                  int32_t k=0;
                  c=u_charMirror(c);
-                UTF_APPEND_CHAR_UNSAFE(dest, k, c);
+                U16_APPEND_UNSAFE(dest, k, c);
                  dest+=k;
                  j+=k;
              }
diff --git a/icu4c/source/common/ucase.cpp b/icu4c/source/common/ucase.cpp

index c6ea6f57c643d035def30d60634adfc52e3eeab7..ebac5153e7b23b1ffa6ef63d5920e21fe6a7286b 100644 (file)
--- a/icu4c/source/common/ucase.cpp
+++ b/icu4c/source/common/ucase.cpp
@@ -20,6 +20,7 @@
  #include "unicode/utypes.h"
  #include "unicode/uset.h"
  #include "unicode/udata.h" /* UDataInfo */
+#include "unicode/utf16.h"
  #include "ucmndata.h" /* DataHeader */
  #include "udatamem.h"
  #include "umutex.h"
diff --git a/icu4c/source/common/ucasemap.cpp b/icu4c/source/common/ucasemap.cpp

index 0b52975d072a7f4673b54457514f89a99f20461d..c43cf165659d5cd4d1352f98fcf0e1da20b6796d 100644 (file)
--- a/icu4c/source/common/ucasemap.cpp
+++ b/icu4c/source/common/ucasemap.cpp
@@ -25,6 +25,9 @@
  #if !UCONFIG_NO_BREAK_ITERATION
  #include "unicode/utext.h"
  #endif
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
  #include "cmemory.h"
  #include "cstring.h"
  #include "ucase.h"
diff --git a/icu4c/source/common/ucharstrie.cpp b/icu4c/source/common/ucharstrie.cpp

index 97301cc84d1235dbb96d832d34eec3b4ac9d7207..476271deacb8360102503d2e079a3bf104004e2f 100644 (file)
--- a/icu4c/source/common/ucharstrie.cpp
+++ b/icu4c/source/common/ucharstrie.cpp
@@ -16,6 +16,7 @@
  #include "unicode/appendable.h"
  #include "unicode/ucharstrie.h"
  #include "unicode/uobject.h"
+#include "unicode/utf16.h"
  #include "cmemory.h"
  #include "uassert.h"
  
@@ -37,6 +38,24 @@ UCharsTrie::current() const {
      }
  }
  
+UStringTrieResult
+UCharsTrie::firstForCodePoint(UChar32 cp) {
+    return cp<=0xffff ?
+        first(cp) :
+        (USTRINGTRIE_HAS_NEXT(first(U16_LEAD(cp))) ?
+            next(U16_TRAIL(cp)) :
+            USTRINGTRIE_NO_MATCH);
+}
+
+UStringTrieResult
+UCharsTrie::nextForCodePoint(UChar32 cp) {
+    return cp<=0xffff ?
+        next(cp) :
+        (USTRINGTRIE_HAS_NEXT(next(U16_LEAD(cp))) ?
+            next(U16_TRAIL(cp)) :
+            USTRINGTRIE_NO_MATCH);
+}
+
  UStringTrieResult
  UCharsTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) {
      // Branch according to the current unit.
diff --git a/icu4c/source/common/uchriter.cpp b/icu4c/source/common/uchriter.cpp

index 6de1e8b8f57e839b84fa0bfe7acccc431ae56df2..2d59e167a1df3d69f27e43961d3c64b063cf0326 100644 (file)
--- a/icu4c/source/common/uchriter.cpp
+++ b/icu4c/source/common/uchriter.cpp
@@ -9,6 +9,7 @@
  
  #include "unicode/uchriter.h"
  #include "unicode/ustring.h"
+#include "unicode/utf16.h"
  #include "ustr_imp.h"
  
  U_NAMESPACE_BEGIN
@@ -191,7 +192,7 @@ UCharCharacterIterator::first32() {
      if(pos < end) {
          int32_t i = pos;
          UChar32 c;
-        UTF_NEXT_CHAR(text, i, end, c);
+        U16_NEXT(text, i, end, c);
          return c;
      } else {
          return DONE;
@@ -203,7 +204,7 @@ UCharCharacterIterator::first32PostInc() {
      pos = begin;
      if(pos < end) {
          UChar32 c;
-        UTF_NEXT_CHAR(text, pos, end, c);
+        U16_NEXT(text, pos, end, c);
          return c;
      } else {
          return DONE;
@@ -215,7 +216,7 @@ UCharCharacterIterator::last32() {
      pos = end;
      if(pos > begin) {
          UChar32 c;
-        UTF_PREV_CHAR(text, begin, pos, c);
+        U16_PREV(text, begin, pos, c);
          return c;
      } else {
          return DONE;
@@ -230,10 +231,10 @@ UCharCharacterIterator::setIndex32(int32_t position) {
          position = end;
      }
      if(position < end) {
-        UTF_SET_CHAR_START(text, begin, position);
+        U16_SET_CP_START(text, begin, position);
          int32_t i = this->pos = position;
          UChar32 c;
-        UTF_NEXT_CHAR(text, i, end, c);
+        U16_NEXT(text, i, end, c);
          return c;
      } else {
          this->pos = position;
@@ -245,7 +246,7 @@ UChar32
  UCharCharacterIterator::current32() const {
      if (pos >= begin && pos < end) {
          UChar32 c;
-        UTF_GET_CHAR(text, begin, pos, end, c);
+        U16_GET(text, begin, pos, end, c);
          return c;
      } else {
          return DONE;
@@ -255,11 +256,11 @@ UCharCharacterIterator::current32() const {
  UChar32
  UCharCharacterIterator::next32() {
      if (pos < end) {
-        UTF_FWD_1(text, pos, end);
+        U16_FWD_1(text, pos, end);
          if(pos < end) {
              int32_t i = pos;
              UChar32 c;
-            UTF_NEXT_CHAR(text, i, end, c);
+            U16_NEXT(text, i, end, c);
              return c;
          }
      }
@@ -272,7 +273,7 @@ UChar32
  UCharCharacterIterator::next32PostInc() {
      if (pos < end) {
          UChar32 c;
-        UTF_NEXT_CHAR(text, pos, end, c);
+        U16_NEXT(text, pos, end, c);
          return c;
      } else {
          return DONE;
@@ -283,7 +284,7 @@ UChar32
  UCharCharacterIterator::previous32() {
      if (pos > begin) {
          UChar32 c;
-        UTF_PREV_CHAR(text, begin, pos, c);
+        U16_PREV(text, begin, pos, c);
          return c;
      } else {
          return DONE;
@@ -323,20 +324,20 @@ UCharCharacterIterator::move32(int32_t delta, CharacterIterator::EOrigin origin)
      case kStart:
          pos = begin;
          if(delta > 0) {
-            UTF_FWD_N(text, pos, end, delta);
+            U16_FWD_N(text, pos, end, delta);
          }
          break;
      case kCurrent:
          if(delta > 0) {
-            UTF_FWD_N(text, pos, end, delta);
+            U16_FWD_N(text, pos, end, delta);
          } else {
-            UTF_BACK_N(text, begin, pos, -delta);
+            U16_BACK_N(text, begin, pos, -delta);
          }
          break;
      case kEnd:
          pos = end;
          if(delta < 0) {
-            UTF_BACK_N(text, begin, pos, -delta);
+            U16_BACK_N(text, begin, pos, -delta);
          }
          break;
      default:
diff --git a/icu4c/source/common/ucnv.c b/icu4c/source/common/ucnv.c

index 0979039c750e30c404e4d96c23cfda925c22cdca..3308c6c7bbdcc366e99cbb74de677bc019186224 100644 (file)
--- a/icu4c/source/common/ucnv.c
+++ b/icu4c/source/common/ucnv.c
@@ -27,6 +27,8 @@
  #include "unicode/ucnv.h"
  #include "unicode/ucnv_err.h"
  #include "unicode/uset.h"
+#include "unicode/utf.h"
+#include "unicode/utf16.h"
  #include "putilimp.h"
  #include "cmemory.h"
  #include "cstring.h"
diff --git a/icu4c/source/common/ucnv2022.cpp b/icu4c/source/common/ucnv2022.cpp

index 628804846ccea6747b4ff8c95410fa8cb917cc7c..404303658d3cd8eb6b68aa4ea5903d1a6e91f41c 100644 (file)
--- a/icu4c/source/common/ucnv2022.cpp
+++ b/icu4c/source/common/ucnv2022.cpp
@@ -34,6 +34,7 @@
  #include "unicode/uset.h"
  #include "unicode/ucnv_err.h"
  #include "unicode/ucnv_cb.h"
+#include "unicode/utf16.h"
  #include "ucnv_imp.h"
  #include "ucnv_bld.h"
  #include "ucnv_cnv.h"
@@ -1645,16 +1646,16 @@ UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
  
              sourceChar  = *(source++);
              /*check if the char is a First surrogate*/
-            if(UTF_IS_SURROGATE(sourceChar)) {
-                if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
+            if(U16_IS_SURROGATE(sourceChar)) {
+                if(U16_IS_SURROGATE_LEAD(sourceChar)) {
  getTrail:
                      /*look ahead to find the trail surrogate*/
                      if(source < sourceLimit) {
                          /* test the following code unit */
                          UChar trail=(UChar) *source;
-                        if(UTF_IS_SECOND_SURROGATE(trail)) {
+                        if(U16_IS_TRAIL(trail)) {
                              source++;
-                            sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
+                            sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
                              cnv->fromUChar32=0x00;
                              /* convert this supplementary code point */
                              /* exit this condition tree */
@@ -2422,16 +2423,16 @@ UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
                   */
  
                  /*check if the char is a First surrogate*/
-                if(UTF_IS_SURROGATE(sourceChar)) {
-                    if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
+                if(U16_IS_SURROGATE(sourceChar)) {
+                    if(U16_IS_SURROGATE_LEAD(sourceChar)) {
  getTrail:
                          /*look ahead to find the trail surrogate*/
                          if(source <  sourceLimit) {
                              /* test the following code unit */
                              UChar trail=(UChar) *source;
-                            if(UTF_IS_SECOND_SURROGATE(trail)) {
+                            if(U16_IS_TRAIL(trail)) {
                                  source++;
-                                sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
+                                sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
                                  *err = U_INVALID_CHAR_FOUND;
                                  /* convert this surrogate code point */
                                  /* exit this condition tree */
@@ -2894,16 +2895,16 @@ UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
  
              sourceChar  = *(source++);
              /*check if the char is a First surrogate*/
-             if(UTF_IS_SURROGATE(sourceChar)) {
-                if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
+             if(U16_IS_SURROGATE(sourceChar)) {
+                if(U16_IS_SURROGATE_LEAD(sourceChar)) {
  getTrail:
                      /*look ahead to find the trail surrogate*/
                      if(source < sourceLimit) {
                          /* test the following code unit */
                          UChar trail=(UChar) *source;
-                        if(UTF_IS_SECOND_SURROGATE(trail)) {
+                        if(U16_IS_TRAIL(trail)) {
                              source++;
-                            sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
+                            sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
                              cnv->fromUChar32=0x00;
                              /* convert this supplementary code point */
                              /* exit this condition tree */
diff --git a/icu4c/source/common/ucnv_bld.h b/icu4c/source/common/ucnv_bld.h

index d8e21774d39e09808c7b27a12c44118d6ab589e1..ce6f374f5ac959494d17b811e215df19cd63b391 100644 (file)
--- a/icu4c/source/common/ucnv_bld.h
+++ b/icu4c/source/common/ucnv_bld.h
@@ -23,6 +23,7 @@
  
  #include "unicode/ucnv.h"
  #include "unicode/ucnv_err.h"
+#include "unicode/utf16.h"
  #include "ucnv_cnv.h"
  #include "ucnvmbcs.h"
  #include "ucnv_ext.h"
diff --git a/icu4c/source/common/ucnv_ct.c b/icu4c/source/common/ucnv_ct.c

index 389a8bc023a93c8635c854db7565554ce091efd7..16f43d277543c461d6eb8c3e2c31807034c1a1a2 100644 (file)
--- a/icu4c/source/common/ucnv_ct.c
+++ b/icu4c/source/common/ucnv_ct.c
@@ -20,6 +20,7 @@
  #include "unicode/uset.h"
  #include "unicode/ucnv_err.h"
  #include "unicode/ucnv_cb.h"
+#include "unicode/utf16.h"
  #include "ucnv_imp.h"
  #include "ucnv_bld.h"
  #include "ucnv_cnv.h"
@@ -356,16 +357,16 @@ UConverter_fromUnicode_CompoundText_OFFSETS(UConverterFromUnicodeArgs* args, UEr
  
              sourceChar  = *(source++);
              /*check if the char is a First surrogate*/
-             if(UTF_IS_SURROGATE(sourceChar)) {
-                if(UTF_IS_SURROGATE_FIRST(sourceChar)) {
+             if(U16_IS_SURROGATE(sourceChar)) {
+                if(U16_IS_SURROGATE_LEAD(sourceChar)) {
  getTrail:
                      /*look ahead to find the trail surrogate*/
                      if(source < sourceLimit) {
                          /* test the following code unit */
                          UChar trail=(UChar) *source;
-                        if(UTF_IS_SECOND_SURROGATE(trail)) {
+                        if(U16_IS_TRAIL(trail)) {
                              source++;
-                            sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
+                            sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
                              cnv->fromUChar32=0x00;
                              /* convert this supplementary code point */
                              /* exit this condition tree */
diff --git a/icu4c/source/common/ucnv_u32.c b/icu4c/source/common/ucnv_u32.c

index f76b9b04edab2b589baa128926f4b8fef365316f..c24aaeeed79f18e1c27d46f773a443e314d929ae 100644 (file)
--- a/icu4c/source/common/ucnv_u32.c
+++ b/icu4c/source/common/ucnv_u32.c
@@ -1,6 +1,6 @@
  /*  
  **********************************************************************
-*   Copyright (C) 2002-2009, International Business Machines
+*   Copyright (C) 2002-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  *   file name:  ucnv_u32.c
@@ -19,6 +19,7 @@
  #if !UCONFIG_NO_CONVERSION
  
  #include "unicode/ucnv.h"
+#include "unicode/utf.h"
  #include "ucnv_bld.h"
  #include "ucnv_cnv.h"
  #include "cmemory.h"
@@ -246,7 +247,7 @@ T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args,
      while (mySource < sourceLimit && myTarget < targetLimit) {
          ch = *(mySource++);
  
-        if (UTF_IS_SURROGATE(ch)) {
+        if (U_IS_SURROGATE(ch)) {
              if (U_IS_LEAD(ch)) {
  lowsurogate:
                  if (mySource < sourceLimit) {
@@ -350,7 +351,7 @@ T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
      while (mySource < sourceLimit && myTarget < targetLimit) {
          ch = *(mySource++);
  
-        if (UTF_IS_SURROGATE(ch)) {
+        if (U_IS_SURROGATE(ch)) {
              if (U_IS_LEAD(ch)) {
  lowsurogate:
                  if (mySource < sourceLimit) {
@@ -726,14 +727,14 @@ T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args,
      {
          ch = *(mySource++);
  
-        if (UTF_IS_SURROGATE(ch)) {
-            if (U_IS_LEAD(ch))
+        if (U16_IS_SURROGATE(ch)) {
+            if (U16_IS_LEAD(ch))
              {
  lowsurogate:
                  if (mySource < sourceLimit)
                  {
                      ch2 = *mySource;
-                    if (U_IS_TRAIL(ch2)) {
+                    if (U16_IS_TRAIL(ch2)) {
                          ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
                          mySource++;
                      }
@@ -838,14 +839,14 @@ T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
      {
          ch = *(mySource++);
  
-        if (UTF_IS_SURROGATE(ch)) {
-            if (U_IS_LEAD(ch))
+        if (U16_IS_SURROGATE(ch)) {
+            if (U16_IS_LEAD(ch))
              {
  lowsurogate:
                  if (mySource < sourceLimit)
                  {
                      ch2 = *mySource;
-                    if (U_IS_TRAIL(ch2))
+                    if (U16_IS_TRAIL(ch2))
                      {
                          ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
                          mySource++;
diff --git a/icu4c/source/common/ucnv_u8.c b/icu4c/source/common/ucnv_u8.c

index 75f554c427bf0a02a7f5547ac05cb6848baed07f..6ff1f5fa22400fef074d572a1d3028d9bba11984 100644 (file)
--- a/icu4c/source/common/ucnv_u8.c
+++ b/icu4c/source/common/ucnv_u8.c
@@ -1,6 +1,6 @@
  /*  
  **********************************************************************
-*   Copyright (C) 2002-2007, International Business Machines
+*   Copyright (C) 2002-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  *   file name:  ucnv_u8.c
@@ -23,6 +23,9 @@
  #if !UCONFIG_NO_CONVERSION
  
  #include "unicode/ucnv.h"
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
  #include "ucnv_bld.h"
  #include "ucnv_cnv.h"
  #include "cmemory.h"
@@ -130,7 +133,7 @@ morebytes:
                  if (mySource < sourceLimit)
                  {
                      toUBytes[i] = (char) (ch2 = *mySource);
-                    if (!UTF8_IS_TRAIL(ch2))
+                    if (!U8_IS_TRAIL(ch2))
                      {
                          break; /* i < inBytes */
                      }
@@ -164,7 +167,7 @@ morebytes:
               * In CESU-8, only surrogates, not supplementary code points, are encoded directly.
               */
              if (i == inBytes && ch <= MAXIMUM_UTF && ch >= utf8_minChar32[i] &&
-                (isCESU8 ? i <= 3 : !UTF_IS_SURROGATE(ch)))
+                (isCESU8 ? i <= 3 : !U_IS_SURROGATE(ch)))
              {
                  /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
                  if (ch <= MAXIMUM_UCS2) 
@@ -259,7 +262,7 @@ morebytes:
                  if (mySource < sourceLimit)
                  {
                      toUBytes[i] = (char) (ch2 = *mySource);
-                    if (!UTF8_IS_TRAIL(ch2))
+                    if (!U8_IS_TRAIL(ch2))
                      {
                          break; /* i < inBytes */
                      }
@@ -292,7 +295,7 @@ morebytes:
               * In CESU-8, only surrogates, not supplementary code points, are encoded directly.
               */
              if (i == inBytes && ch <= MAXIMUM_UTF && ch >= utf8_minChar32[i] &&
-                (isCESU8 ? i <= 3 : !UTF_IS_SURROGATE(ch)))
+                (isCESU8 ? i <= 3 : !U_IS_SURROGATE(ch)))
              {
                  /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
                  if (ch <= MAXIMUM_UCS2) 
@@ -387,13 +390,13 @@ U_CFUNC void ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args,
          }
          else {
              /* Check for surrogates */
-            if(UTF_IS_SURROGATE(ch) && isNotCESU8) {
+            if(U16_IS_SURROGATE(ch) && isNotCESU8) {
  lowsurrogate:
                  if (mySource < sourceLimit) {
                      /* test both code units */
-                    if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_SECOND_SURROGATE(*mySource)) {
+                    if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) {
                          /* convert and consume this supplementary code point */
-                        ch=UTF16_GET_PAIR_VALUE(ch, *mySource);
+                        ch=U16_GET_SUPPLEMENTARY(ch, *mySource);
                          ++mySource;
                          /* exit this condition tree */
                      }
@@ -513,13 +516,13 @@ U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * ar
          {
              nextSourceIndex = offsetNum + 1;
  
-            if(UTF_IS_SURROGATE(ch) && isNotCESU8) {
+            if(U16_IS_SURROGATE(ch) && isNotCESU8) {
  lowsurrogate:
                  if (mySource < sourceLimit) {
                      /* test both code units */
-                    if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_SECOND_SURROGATE(*mySource)) {
+                    if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) {
                          /* convert and consume this supplementary code point */
-                        ch=UTF16_GET_PAIR_VALUE(ch, *mySource);
+                        ch=U16_GET_SUPPLEMENTARY(ch, *mySource);
                          ++mySource;
                          ++nextSourceIndex;
                          /* exit this condition tree */
@@ -662,7 +665,7 @@ static UChar32 ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
      case 6:
          ch += (myByte = *source);
          ch <<= 6;
-        if (!UTF8_IS_TRAIL(myByte))
+        if (!U8_IS_TRAIL(myByte))
          {
              isLegalSequence = 0;
              break;
@@ -671,7 +674,7 @@ static UChar32 ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
      case 5:
          ch += (myByte = *source);
          ch <<= 6;
-        if (!UTF8_IS_TRAIL(myByte))
+        if (!U8_IS_TRAIL(myByte))
          {
              isLegalSequence = 0;
              break;
@@ -680,7 +683,7 @@ static UChar32 ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
      case 4:
          ch += (myByte = *source);
          ch <<= 6;
-        if (!UTF8_IS_TRAIL(myByte))
+        if (!U8_IS_TRAIL(myByte))
          {
              isLegalSequence = 0;
              break;
@@ -689,7 +692,7 @@ static UChar32 ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
      case 3:
          ch += (myByte = *source);
          ch <<= 6;
-        if (!UTF8_IS_TRAIL(myByte))
+        if (!U8_IS_TRAIL(myByte))
          {
              isLegalSequence = 0;
              break;
@@ -697,7 +700,7 @@ static UChar32 ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
          ++source;
      case 2:
          ch += (myByte = *source);
-        if (!UTF8_IS_TRAIL(myByte))
+        if (!U8_IS_TRAIL(myByte))
          {
              isLegalSequence = 0;
              break;
diff --git a/icu4c/source/common/ucnvbocu.cpp b/icu4c/source/common/ucnvbocu.cpp

index 23cf92b405143097ef03972c87ad081e3fcb2884..9d6fe9a5bf4a2095e8308ea9724761ec06f7df67 100644 (file)
--- a/icu4c/source/common/ucnvbocu.cpp
+++ b/icu4c/source/common/ucnvbocu.cpp
@@ -23,6 +23,7 @@
  
  #include "unicode/ucnv.h"
  #include "unicode/ucnv_cb.h"
+#include "unicode/utf16.h"
  #include "ucnv_bld.h"
  #include "ucnv_cnv.h"
  
@@ -476,15 +477,15 @@ fastSingle:
                  continue;
              }
  
-            if(UTF_IS_LEAD(c)) {
+            if(U16_IS_LEAD(c)) {
  getTrail:
                  if(source<sourceLimit) {
                      /* test the following code unit */
                      UChar trail=*source;
-                    if(UTF_IS_SECOND_SURROGATE(trail)) {
+                    if(U16_IS_TRAIL(trail)) {
                          ++source;
                          ++nextSourceIndex;
-                        c=UTF16_GET_PAIR_VALUE(c, trail);
+                        c=U16_GET_SUPPLEMENTARY(c, trail);
                      }
                  } else {
                      /* no more input */
@@ -711,14 +712,14 @@ fastSingle:
                  continue;
              }
  
-            if(UTF_IS_LEAD(c)) {
+            if(U16_IS_LEAD(c)) {
  getTrail:
                  if(source<sourceLimit) {
                      /* test the following code unit */
                      UChar trail=*source;
-                    if(UTF_IS_SECOND_SURROGATE(trail)) {
+                    if(U16_IS_TRAIL(trail)) {
                          ++source;
-                        c=UTF16_GET_PAIR_VALUE(c, trail);
+                        c=U16_GET_SUPPLEMENTARY(c, trail);
                      }
                  } else {
                      /* no more input */
@@ -1110,15 +1111,15 @@ getTrail:
              *offsets++=sourceIndex;
          } else {
              /* output surrogate pair */
-            *target++=UTF16_LEAD(c);
+            *target++=U16_LEAD(c);
              if(target<targetLimit) {
-                *target++=UTF16_TRAIL(c);
+                *target++=U16_TRAIL(c);
                  *offsets++=sourceIndex;
                  *offsets++=sourceIndex;
              } else {
                  /* target overflow */
                  *offsets++=sourceIndex;
-                cnv->UCharErrorBuffer[0]=UTF16_TRAIL(c);
+                cnv->UCharErrorBuffer[0]=U16_TRAIL(c);
                  cnv->UCharErrorBufferLength=1;
                  *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
                  break;
@@ -1315,12 +1316,12 @@ getTrail:
              *target++=(UChar)c;
          } else {
              /* output surrogate pair */
-            *target++=UTF16_LEAD(c);
+            *target++=U16_LEAD(c);
              if(target<targetLimit) {
-                *target++=UTF16_TRAIL(c);
+                *target++=U16_TRAIL(c);
              } else {
                  /* target overflow */
-                cnv->UCharErrorBuffer[0]=UTF16_TRAIL(c);
+                cnv->UCharErrorBuffer[0]=U16_TRAIL(c);
                  cnv->UCharErrorBufferLength=1;
                  *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
                  break;
diff --git a/icu4c/source/common/ucnvhz.c b/icu4c/source/common/ucnvhz.c

index 074b4f4a5f3956e991706801574766821c16d03d..3c9ae7da7aed6c6ed52d24203c6570b85882a77c 100644 (file)
--- a/icu4c/source/common/ucnvhz.c
+++ b/icu4c/source/common/ucnvhz.c
@@ -1,6 +1,6 @@
  /*  
  **********************************************************************
-*   Copyright (C) 2000-2009, International Business Machines
+*   Copyright (C) 2000-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  *   file name:  ucnvhz.c
@@ -22,6 +22,7 @@
  #include "unicode/ucnv.h"
  #include "unicode/ucnv_cb.h"
  #include "unicode/uset.h"
+#include "unicode/utf16.h"
  #include "ucnv_bld.h"
  #include "ucnv_cnv.h"
  #include "ucnv_imp.h"
@@ -446,17 +447,17 @@ UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
                  /* oops.. the code point is unassigned */
                  /*Handle surrogates */
                  /*check if the char is a First surrogate*/
-                if(UTF_IS_SURROGATE(mySourceChar)) {
-                    if(UTF_IS_SURROGATE_FIRST(mySourceChar)) {
+                if(U16_IS_SURROGATE(mySourceChar)) {
+                    if(U16_IS_SURROGATE_LEAD(mySourceChar)) {
                          args->converter->fromUChar32=mySourceChar;
  getTrail:
                          /*look ahead to find the trail surrogate*/
                          if(mySourceIndex <  mySourceLength) {
                              /* test the following code unit */
                              UChar trail=(UChar) args->source[mySourceIndex];
-                            if(UTF_IS_SECOND_SURROGATE(trail)) {
+                            if(U16_IS_TRAIL(trail)) {
                                  ++mySourceIndex;
-                                mySourceChar=UTF16_GET_PAIR_VALUE(args->converter->fromUChar32, trail);
+                                mySourceChar=U16_GET_SUPPLEMENTARY(args->converter->fromUChar32, trail);
                                  args->converter->fromUChar32=0x00;
                                  /* there are no surrogates in GB2312*/
                                  *err = U_INVALID_CHAR_FOUND;
diff --git a/icu4c/source/common/ucnvisci.c b/icu4c/source/common/ucnvisci.c

index 77a763d63144a172efdabe7d8429b37441684f4f..696fde4c6828a56f16586be51b26fecf4e9fb70c 100644 (file)
--- a/icu4c/source/common/ucnvisci.c
+++ b/icu4c/source/common/ucnvisci.c
@@ -19,11 +19,12 @@
  
  #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
  
+#include "unicode/ucnv.h"
+#include "unicode/ucnv_cb.h"
+#include "unicode/utf16.h"
  #include "cmemory.h"
  #include "ucnv_bld.h"
-#include "unicode/ucnv.h"
  #include "ucnv_cnv.h"
-#include "unicode/ucnv_cb.h"
  #include "cstring.h"
  
  #define UCNV_OPTIONS_VERSION_MASK 0xf
@@ -1054,16 +1055,16 @@ static void UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(
          } else {
              /* oops.. the code point is unassigned */
              /*check if the char is a First surrogate*/
-            if (UTF_IS_SURROGATE(sourceChar)) {
-                if (UTF_IS_SURROGATE_FIRST(sourceChar)) {
+            if (U16_IS_SURROGATE(sourceChar)) {
+                if (U16_IS_SURROGATE_LEAD(sourceChar)) {
  getTrail:
                      /*look ahead to find the trail surrogate*/
                      if (source < sourceLimit) {
                          /* test the following code unit */
                          UChar trail= (*source);
-                        if (UTF_IS_SECOND_SURROGATE(trail)) {
+                        if (U16_IS_TRAIL(trail)) {
                              source++;
-                            sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
+                            sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
                              *err =U_INVALID_CHAR_FOUND;
                              /* convert this surrogate code point */
                              /* exit this condition tree */
diff --git a/icu4c/source/common/ucnvlat1.c b/icu4c/source/common/ucnvlat1.c

index 4e682dfd1c3caa2dca0c8eca724481010c57cf63..54140ec92bd8cacf71906251edca38fb918f55d7 100644 (file)
--- a/icu4c/source/common/ucnvlat1.c
+++ b/icu4c/source/common/ucnvlat1.c
@@ -1,6 +1,6 @@
  /* 
  **********************************************************************
-*   Copyright (C) 2000-2007, International Business Machines
+*   Copyright (C) 2000-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  *   file name:  ucnvlat1.cpp
@@ -18,6 +18,7 @@
  
  #include "unicode/ucnv.h"
  #include "unicode/uset.h"
+#include "unicode/utf8.h"
  #include "ucnv_bld.h"
  #include "ucnv_cnv.h"
  
diff --git a/icu4c/source/common/ucnvmbcs.c b/icu4c/source/common/ucnvmbcs.c

index b6b9c732be8b55c3bb342a6519e22bc5c92caaac..77aa0657d80a8be14f6686365f2293fde013ce68 100644 (file)
--- a/icu4c/source/common/ucnvmbcs.c
+++ b/icu4c/source/common/ucnvmbcs.c
@@ -48,6 +48,8 @@
  #include "unicode/ucnv_cb.h"
  #include "unicode/udata.h"
  #include "unicode/uset.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
  #include "ucnv_bld.h"
  #include "ucnvmbcs.h"
  #include "ucnv_ext.h"
@@ -3352,16 +3354,16 @@ ucnv_MBCSDoubleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
                   * If it does, then surrogates are not paired but mapped separately.
                   * Note that in this case unmatched surrogates are not detected.
                   */
-                if(UTF_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
-                    if(UTF_IS_SURROGATE_FIRST(c)) {
+                if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
+                    if(U16_IS_SURROGATE_LEAD(c)) {
  getTrail:
                          if(source<sourceLimit) {
                              /* test the following code unit */
                              UChar trail=*source;
-                            if(UTF_IS_SECOND_SURROGATE(trail)) {
+                            if(U16_IS_TRAIL(trail)) {
                                  ++source;
                                  ++nextSourceIndex;
-                                c=UTF16_GET_PAIR_VALUE(c, trail);
+                                c=U16_GET_SUPPLEMENTARY(c, trail);
                                  if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
                                      /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
                                      /* callback(unassigned) */
@@ -3557,16 +3559,16 @@ ucnv_MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
               */
              c=*source++;
              ++nextSourceIndex;
-            if(UTF_IS_SURROGATE(c)) {
-                if(UTF_IS_SURROGATE_FIRST(c)) {
+            if(U16_IS_SURROGATE(c)) {
+                if(U16_IS_SURROGATE_LEAD(c)) {
  getTrail:
                      if(source<sourceLimit) {
                          /* test the following code unit */
                          UChar trail=*source;
-                        if(UTF_IS_SECOND_SURROGATE(trail)) {
+                        if(U16_IS_TRAIL(trail)) {
                              ++source;
                              ++nextSourceIndex;
-                            c=UTF16_GET_PAIR_VALUE(c, trail);
+                            c=U16_GET_SUPPLEMENTARY(c, trail);
                              if(!hasSupplementary) {
                                  /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
                                  /* callback(unassigned) */
@@ -3805,16 +3807,16 @@ unrolled:
              /* normal end of conversion: prepare for a new character */
              c=0;
              continue;
-        } else if(!UTF_IS_SURROGATE(c)) {
+        } else if(!U16_IS_SURROGATE(c)) {
              /* normal, unassigned BMP character */
-        } else if(UTF_IS_SURROGATE_FIRST(c)) {
+        } else if(U16_IS_SURROGATE_LEAD(c)) {
  getTrail:
              if(source<sourceLimit) {
                  /* test the following code unit */
                  UChar trail=*source;
-                if(UTF_IS_SECOND_SURROGATE(trail)) {
+                if(U16_IS_TRAIL(trail)) {
                      ++source;
-                    c=UTF16_GET_PAIR_VALUE(c, trail);
+                    c=U16_GET_SUPPLEMENTARY(c, trail);
                      /* this codepage does not map supplementary code points */
                      /* callback(unassigned) */
                  } else {
@@ -4235,16 +4237,16 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
                   * If it does, then surrogates are not paired but mapped separately.
                   * Note that in this case unmatched surrogates are not detected.
                   */
-                if(UTF_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
-                    if(UTF_IS_SURROGATE_FIRST(c)) {
+                if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
+                    if(U16_IS_SURROGATE_LEAD(c)) {
  getTrail:
                          if(source<sourceLimit) {
                              /* test the following code unit */
                              UChar trail=*source;
-                            if(UTF_IS_SECOND_SURROGATE(trail)) {
+                            if(U16_IS_TRAIL(trail)) {
                                  ++source;
                                  ++nextSourceIndex;
-                                c=UTF16_GET_PAIR_VALUE(c, trail);
+                                c=U16_GET_SUPPLEMENTARY(c, trail);
                                  if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
                                      /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
                                      cnv->fromUnicodeStatus=prevLength; /* save the old state */
diff --git a/icu4c/source/common/ucnvscsu.c b/icu4c/source/common/ucnvscsu.c

index 76e973d0598aa5dd563fd06bd4c53e11dbd94b6a..cf2a0a5c51940c5a2e8316f5189d9a06c03f4a85 100644 (file)
--- a/icu4c/source/common/ucnvscsu.c
+++ b/icu4c/source/common/ucnvscsu.c
@@ -1,7 +1,7 @@
  /*
  ******************************************************************************
  *
-*   Copyright (C) 2000-2009, International Business Machines
+*   Copyright (C) 2000-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  ******************************************************************************
@@ -25,6 +25,7 @@
  
  #include "unicode/ucnv.h"
  #include "unicode/ucnv_cb.h"
+#include "unicode/utf16.h"
  #include "ucnv_bld.h"
  #include "ucnv_cnv.h"
  #include "cmemory.h"
@@ -1098,17 +1099,17 @@ loop:
                      *offsets++=sourceIndex;
                  }
                  --targetCapacity;
-            } else if(UTF_IS_SURROGATE(c)) {
-                if(UTF_IS_SURROGATE_FIRST(c)) {
+            } else if(U16_IS_SURROGATE(c)) {
+                if(U16_IS_SURROGATE_LEAD(c)) {
  getTrailSingle:
                      lead=(UChar)c;
                      if(source<sourceLimit) {
                          /* test the following code unit */
                          trail=*source;
-                        if(UTF_IS_SECOND_SURROGATE(trail)) {
+                        if(U16_IS_TRAIL(trail)) {
                              ++source;
                              ++nextSourceIndex;
-                            c=UTF16_GET_PAIR_VALUE(c, trail);
+                            c=U16_GET_SUPPLEMENTARY(c, trail);
                              /* convert this surrogate code point */
                              /* exit this condition tree */
                          } else {
@@ -1296,16 +1297,16 @@ getTrailSingle:
                  goto outputBytes;
              } else if(c<0xe000) {
                  /* c is a surrogate */
-                if(UTF_IS_SURROGATE_FIRST(c)) {
+                if(U16_IS_SURROGATE_LEAD(c)) {
  getTrailUnicode:
                      lead=(UChar)c;
                      if(source<sourceLimit) {
                          /* test the following code unit */
                          trail=*source;
-                        if(UTF_IS_SECOND_SURROGATE(trail)) {
+                        if(U16_IS_TRAIL(trail)) {
                              ++source;
                              ++nextSourceIndex;
-                            c=UTF16_GET_PAIR_VALUE(c, trail);
+                            c=U16_GET_SUPPLEMENTARY(c, trail);
                              /* convert this surrogate code point */
                              /* exit this condition tree */
                          } else {
@@ -1573,16 +1574,16 @@ loop:
                  /* use the current dynamic window */
                  *target++=(uint8_t)(delta|0x80);
                  --targetCapacity;
-            } else if(UTF_IS_SURROGATE(c)) {
-                if(UTF_IS_SURROGATE_FIRST(c)) {
+            } else if(U16_IS_SURROGATE(c)) {
+                if(U16_IS_SURROGATE_LEAD(c)) {
  getTrailSingle:
                      lead=(UChar)c;
                      if(source<sourceLimit) {
                          /* test the following code unit */
                          trail=*source;
-                        if(UTF_IS_SECOND_SURROGATE(trail)) {
+                        if(U16_IS_TRAIL(trail)) {
                              ++source;
-                            c=UTF16_GET_PAIR_VALUE(c, trail);
+                            c=U16_GET_SUPPLEMENTARY(c, trail);
                              /* convert this surrogate code point */
                              /* exit this condition tree */
                          } else {
@@ -1758,15 +1759,15 @@ getTrailSingle:
                  goto outputBytes;
              } else if(c<0xe000) {
                  /* c is a surrogate */
-                if(UTF_IS_SURROGATE_FIRST(c)) {
+                if(U16_IS_SURROGATE_LEAD(c)) {
  getTrailUnicode:
                      lead=(UChar)c;
                      if(source<sourceLimit) {
                          /* test the following code unit */
                          trail=*source;
-                        if(UTF_IS_SECOND_SURROGATE(trail)) {
+                        if(U16_IS_TRAIL(trail)) {
                              ++source;
-                            c=UTF16_GET_PAIR_VALUE(c, trail);
+                            c=U16_GET_SUPPLEMENTARY(c, trail);
                              /* convert this surrogate code point */
                              /* exit this condition tree */
                          } else {
diff --git a/icu4c/source/common/uiter.cpp b/icu4c/source/common/uiter.cpp

index bec7190c63538ed87d1d76a5e1b09f7a745d044d..8c89d4bbe46471bf1557eb9edb6ad230edda2365 100644 (file)
--- a/icu4c/source/common/uiter.cpp
+++ b/icu4c/source/common/uiter.cpp
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 2002-2006, International Business Machines
+*   Copyright (C) 2002-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
@@ -19,6 +19,9 @@
  #include "unicode/chariter.h"
  #include "unicode/rep.h"
  #include "unicode/uiter.h"
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
  #include "cstring.h"
  
  U_NAMESPACE_USE
@@ -1038,22 +1041,22 @@ uiter_current32(UCharIterator *iter) {
      UChar32 c, c2;
  
      c=iter->current(iter);
-    if(UTF_IS_SURROGATE(c)) {
-        if(UTF_IS_SURROGATE_FIRST(c)) {
+    if(U16_IS_SURROGATE(c)) {
+        if(U16_IS_SURROGATE_LEAD(c)) {
              /*
               * go to the next code unit
               * we know that we are not at the limit because c!=U_SENTINEL
               */
              iter->move(iter, 1, UITER_CURRENT);
-            if(UTF_IS_SECOND_SURROGATE(c2=iter->current(iter))) {
-                c=UTF16_GET_PAIR_VALUE(c, c2);
+            if(U16_IS_TRAIL(c2=iter->current(iter))) {
+                c=U16_GET_SUPPLEMENTARY(c, c2);
              }
  
              /* undo index movement */
              iter->move(iter, -1, UITER_CURRENT);
          } else {
-            if(UTF_IS_FIRST_SURROGATE(c2=iter->previous(iter))) {
-                c=UTF16_GET_PAIR_VALUE(c2, c);
+            if(U16_IS_LEAD(c2=iter->previous(iter))) {
+                c=U16_GET_SUPPLEMENTARY(c2, c);
              }
              if(c2>=0) {
                  /* undo index movement */
@@ -1069,9 +1072,9 @@ uiter_next32(UCharIterator *iter) {
      UChar32 c, c2;
  
      c=iter->next(iter);
-    if(UTF_IS_FIRST_SURROGATE(c)) {
-        if(UTF_IS_SECOND_SURROGATE(c2=iter->next(iter))) {
-            c=UTF16_GET_PAIR_VALUE(c, c2);
+    if(U16_IS_LEAD(c)) {
+        if(U16_IS_TRAIL(c2=iter->next(iter))) {
+            c=U16_GET_SUPPLEMENTARY(c, c2);
          } else if(c2>=0) {
              /* unmatched first surrogate, undo index movement */
              iter->move(iter, -1, UITER_CURRENT);
@@ -1085,9 +1088,9 @@ uiter_previous32(UCharIterator *iter) {
      UChar32 c, c2;
  
      c=iter->previous(iter);
-    if(UTF_IS_SECOND_SURROGATE(c)) {
-        if(UTF_IS_FIRST_SURROGATE(c2=iter->previous(iter))) {
-            c=UTF16_GET_PAIR_VALUE(c2, c);
+    if(U16_IS_TRAIL(c)) {
+        if(U16_IS_LEAD(c2=iter->previous(iter))) {
+            c=U16_GET_SUPPLEMENTARY(c2, c);
          } else if(c2>=0) {
              /* unmatched second surrogate, undo index movement */
              iter->move(iter, 1, UITER_CURRENT);
diff --git a/icu4c/source/common/unames.cpp b/icu4c/source/common/unames.cpp

index ac46af88cfb65db28c113bbc0ab20ed49df26730..989a87d5a0b2f938070bf7c639e1a6fb8ec711d6 100644 (file)
--- a/icu4c/source/common/unames.cpp
+++ b/icu4c/source/common/unames.cpp
@@ -18,6 +18,8 @@
  #include "unicode/putil.h"
  #include "unicode/uchar.h"
  #include "unicode/udata.h"
+#include "unicode/utf.h"
+#include "unicode/utf16.h"
  #include "ustr_imp.h"
  #include "umutex.h"
  #include "cmemory.h"
@@ -442,12 +444,12 @@ compareName(UCharNames *names,
  static uint8_t getCharCat(UChar32 cp) {
      uint8_t cat;
  
-    if (UTF_IS_UNICODE_NONCHAR(cp)) {
+    if (U_IS_UNICODE_NONCHAR(cp)) {
          return U_NONCHARACTER_CODE_POINT;
      }
  
      if ((cat = u_charType(cp)) == U_SURROGATE) {
-        cat = UTF_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;
+        cat = U_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;
      }
  
      return cat;
diff --git a/icu4c/source/common/unicode/ucharstrie.h b/icu4c/source/common/unicode/ucharstrie.h

index 88d70f44c0a1b1e10c36dd9a7ff291988cb0e27c..8b0e9206f017ab857ecb07182be6f34c439c3b1d 100644 (file)
--- a/icu4c/source/common/unicode/ucharstrie.h
+++ b/icu4c/source/common/unicode/ucharstrie.h
@@ -172,13 +172,7 @@ public:
       * @return The match/value Result.
       * @draft ICU 4.8
       */
-    inline UStringTrieResult firstForCodePoint(UChar32 cp) {
-        return cp<=0xffff ?
-            first(cp) :
-            (USTRINGTRIE_HAS_NEXT(first(U16_LEAD(cp))) ?
-                next(U16_TRAIL(cp)) :
-                USTRINGTRIE_NO_MATCH);
-    }
+    UStringTrieResult firstForCodePoint(UChar32 cp);
  
      /**
       * Traverses the trie from the current state for this input UChar.
@@ -195,13 +189,7 @@ public:
       * @return The match/value Result.
       * @draft ICU 4.8
       */
-    inline UStringTrieResult nextForCodePoint(UChar32 cp) {
-        return cp<=0xffff ?
-            next(cp) :
-            (USTRINGTRIE_HAS_NEXT(next(U16_LEAD(cp))) ?
-                next(U16_TRAIL(cp)) :
-                USTRINGTRIE_NO_MATCH);
-    }
+    UStringTrieResult nextForCodePoint(UChar32 cp);
  
      /**
       * Traverses the trie from the current state for this string.
diff --git a/icu4c/source/common/unicode/uconfig.h b/icu4c/source/common/unicode/uconfig.h

index 4a9f55c0b7c8250076a4d48873f02ac7b29b570c..5da685e88cf22323e4e081c88c51d237ef3970a5 100644 (file)
--- a/icu4c/source/common/unicode/uconfig.h
+++ b/icu4c/source/common/unicode/uconfig.h
@@ -120,6 +120,24 @@
  #define U_DISABLE_RENAMING 0
  #endif
  
+/**
+ * \def U_NO_DEFAULT_INCLUDE_UTF_HEADERS
+ * Determines whether utypes.h includes utf.h, utf8.h, utf16.h and utf_old.h.
+ * utypes.h includes those headers if this macro is defined to 0.
+ * Otherwise, each those headers must be included explicitly when using one of their macros.
+ * Defaults to 0 for backward compatibility, except inside ICU.
+ * @draft ICU 49
+ */
+#ifdef U_NO_DEFAULT_INCLUDE_UTF_HEADERS
+    /* Use the predefined value. */
+#elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \
+      defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \
+      defined(U_TOOLUTIL_IMPLEMENTATION)
+#   define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 1
+#else
+#   define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 0
+#endif
+
  /**
   * \def U_OVERRIDE_CXX_ALLOCATION
   * Determines whether to override new and delete.
diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h

index f895bab9dff25594dd07409d4a49bb7a144a193b..a95a6dd00c1319c7e2956112e7b01a7c712eb3d5 100644 (file)
--- a/icu4c/source/common/unicode/unistr.h
+++ b/icu4c/source/common/unicode/unistr.h
@@ -1296,7 +1296,7 @@ public:
     *         or 0xffff if the offset is not valid for this string
     * @stable ICU 2.0
     */
-  inline UChar32 char32At(int32_t offset) const;
+  UChar32 char32At(int32_t offset) const;
  
    /**
     * Adjust a random-access offset so that
@@ -1313,7 +1313,7 @@ public:
     * @see U16_SET_CP_START
     * @stable ICU 2.0
     */
-  inline int32_t getChar32Start(int32_t offset) const;
+  int32_t getChar32Start(int32_t offset) const;
  
    /**
     * Adjust a random-access offset so that
@@ -1331,7 +1331,7 @@ public:
     * @see U16_SET_CP_LIMIT
     * @stable ICU 2.0
     */
-  inline int32_t getChar32Limit(int32_t offset) const;
+  int32_t getChar32Limit(int32_t offset) const;
  
    /**
     * Move the code unit index along the string by delta code points.
@@ -2122,7 +2122,7 @@ public:
     * @return a reference to this
     * @stable ICU 2.0
     */
-  inline UnicodeString& append(UChar32 srcChar);
+  UnicodeString& append(UChar32 srcChar);
  
  
    /* Insert operations */
@@ -2317,9 +2317,7 @@ public:
     * @return a reference to this
     * @stable ICU 2.0
     */
-  inline UnicodeString& replace(int32_t start,
-             int32_t length,
-             UChar32 srcChar);
+  UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
  
    /**
     * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
@@ -4125,17 +4123,6 @@ UnicodeString::replace(int32_t start,
                 UChar srcChar)
  { return doReplace(start, _length, &srcChar, 0, 1); }
  
-inline UnicodeString&
-UnicodeString::replace(int32_t start,
-               int32_t _length,
-               UChar32 srcChar) {
-  UChar buffer[U16_MAX_LENGTH];
-  int32_t count = 0;
-  UBool isError = FALSE;
-  U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
-  return doReplace(start, _length, buffer, 0, count);
-}
-
  inline UnicodeString&
  UnicodeString::replaceBetween(int32_t start,
                    int32_t limit,
@@ -4234,43 +4221,6 @@ inline UChar
  UnicodeString::operator[] (int32_t offset) const
  { return doCharAt(offset); }
  
-inline UChar32
-UnicodeString::char32At(int32_t offset) const
-{
-  int32_t len = length();
-  if((uint32_t)offset < (uint32_t)len) {
-    const UChar *array = getArrayStart();
-    UChar32 c;
-    U16_GET(array, 0, offset, len, c);
-    return c;
-  } else {
-    return kInvalidUChar;
-  }
-}
-
-inline int32_t
-UnicodeString::getChar32Start(int32_t offset) const {
-  if((uint32_t)offset < (uint32_t)length()) {
-    const UChar *array = getArrayStart();
-    U16_SET_CP_START(array, 0, offset);
-    return offset;
-  } else {
-    return 0;
-  }
-}
-
-inline int32_t
-UnicodeString::getChar32Limit(int32_t offset) const {
-  int32_t len = length();
-  if((uint32_t)offset < (uint32_t)len) {
-    const UChar *array = getArrayStart();
-    U16_SET_CP_LIMIT(array, 0, offset, len);
-    return offset;
-  } else {
-    return len;
-  }
-}
-
  inline UBool
  UnicodeString::isEmpty() const {
    return fShortLength == 0;
@@ -4423,15 +4373,6 @@ inline UnicodeString&
  UnicodeString::append(UChar srcChar)
  { return doReplace(length(), 0, &srcChar, 0, 1); }
  
-inline UnicodeString&
-UnicodeString::append(UChar32 srcChar) {
-  UChar buffer[U16_MAX_LENGTH];
-  int32_t _length = 0;
-  UBool isError = FALSE;
-  U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
-  return doReplace(length(), 0, buffer, 0, _length);
-}
-
  inline UnicodeString&
  UnicodeString::operator+= (UChar ch)
  { return doReplace(length(), 0, &ch, 0, 1); }
diff --git a/icu4c/source/common/unicode/utf.h b/icu4c/source/common/unicode/utf.h

index f79479935d4a1d6b1befcd405cd04f52381b93e7..2f646013be0304bdd78fe6f072038ee1284d25a6 100644 (file)
--- a/icu4c/source/common/unicode/utf.h
+++ b/icu4c/source/common/unicode/utf.h
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 1999-2010, International Business Machines
+*   Copyright (C) 1999-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
@@ -22,15 +22,20 @@
   * a surrogate or a non-character etc.
   *
   * The UChar and UChar32 data types for Unicode code units and code points
- * are defined in umachines.h because they can be machine-dependent.
+ * are defined in umachine.h because they can be machine-dependent.
   *
- * utf.h is included by utypes.h and itself includes utf8.h and utf16.h after some
- * common definitions. Those files define macros for efficiently getting code points
+ * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 0 then utf.h is included by utypes.h
+ * and itself includes utf8.h and utf16.h after some
+ * common definitions.
+ * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 1 then each of these headers must be
+ * included explicitly if their definitions are used.
+ *
+ * utf8.h and utf16.h define macros for efficiently getting code points
   * in and out of UTF-8/16 strings.
   * utf16.h macros have "U16_" prefixes.
   * utf8.h defines similar macros with "U8_" prefixes for UTF-8 string handling.
   *
- * ICU processes 16-bit Unicode strings.
+ * ICU mostly processes 16-bit Unicode strings.
   * Most of the time, such strings are well-formed UTF-16.
   * Single, unpaired surrogates must be handled as well, and are treated in ICU
   * like regular code points where possible.
@@ -42,15 +47,16 @@
   * ICU functions handle supplementary code points (U+10000..U+10ffff)
   * but are optimized for the much more frequently occurring BMP code points.
   *
- * utf.h defines UChar to be an unsigned 16-bit integer. If this matches wchar_t, then
- * UChar is defined to be exactly wchar_t, otherwise uint16_t.
+ * umachine.h defines UChar to be an unsigned 16-bit integer.
+ * Where available, UChar is defined to be a char16_t
+ * or a wchar_t (if that is an unsigned 16-bit type), otherwise uint16_t.
   *
   * UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit
   * Unicode code point (Unicode scalar value, 0..0x10ffff).
   * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as
   * the definition of UChar. For details see the documentation for UChar32 itself.
   *
- * utf.h also defines a small number of C macros for single Unicode code points.
+ * utf.h defines a small number of C macros for single Unicode code points.
   * These are simple checks for surrogates and non-characters.
   * For actual Unicode character properties see uchar.h.
   *
@@ -59,9 +65,6 @@
   * The macros will detect if a surrogate code unit is unpaired
   * (lead unit without trail unit or vice versa) and just return the unit itself
   * as the code point.
- * (It is an accidental property of Unicode and UTF-16 that all
- * malformed sequences can be expressed unambiguously with a distinct subrange
- * of Unicode code points.)
   *
   * The regular "safe" macros require that the initial, passed-in string index
   * is within bounds. They only check the index when they read more than one
@@ -95,7 +98,7 @@
   * code point values (0..U+10ffff). They are indicated with negative values instead.
   *
   * For more information see the ICU User Guide Strings chapter
- * (http://icu-project.org/userguide/strings.html).
+ * (http://userguide.icu-project.org/strings).
   *
   * <em>Usage:</em>
   * ICU coding guidelines for if() statements should be followed when using these macros.
@@ -108,7 +111,7 @@
  #ifndef __UTF_H__
  #define __UTF_H__
  
-#include "unicode/utypes.h"
+#include "unicode/umachine.h"
  /* include the utfXX.h after the following definitions */
  
  /* single-code point definitions -------------------------------------------- */
@@ -227,10 +230,14 @@
  
  /* include the utfXX.h ------------------------------------------------------ */
  
+#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS
+
  #include "unicode/utf8.h"
  #include "unicode/utf16.h"
  
  /* utf_old.h contains deprecated, pre-ICU 2.4 definitions */
  #include "unicode/utf_old.h"
  
-#endif
+#endif  /* !U_NO_DEFAULT_INCLUDE_UTF_HEADERS */
+
+#endif  /* __UTF_H__ */
diff --git a/icu4c/source/common/unicode/utf16.h b/icu4c/source/common/unicode/utf16.h

index 5079c1146b3d347230cd698a0e1396897e4845ad..73f784cca5c5ba97624a88db54e6f2a3a793561a 100644 (file)
--- a/icu4c/source/common/unicode/utf16.h
+++ b/icu4c/source/common/unicode/utf16.h
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 1999-2010, International Business Machines
+*   Copyright (C) 1999-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
@@ -19,11 +19,9 @@
   * \brief C API: 16-bit Unicode handling macros
   * 
   * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
- * utf16.h is included by utf.h after unicode/umachine.h
- * and some common definitions.
   *
   * For more information see utf.h and the ICU User Guide Strings chapter
- * (http://icu-project.org/userguide/strings.html).
+ * (http://userguide.icu-project.org/strings).
   *
   * <em>Usage:</em>
   * ICU coding guidelines for if() statements should be followed when using these macros.
@@ -34,7 +32,7 @@
  #ifndef __UTF16_H__
  #define __UTF16_H__
  
-/* utf.h must be included first. */
+#include "unicode/umachine.h"
  #ifndef __UTF_H__
  #   include "unicode/utf.h"
  #endif
diff --git a/icu4c/source/common/unicode/utf8.h b/icu4c/source/common/unicode/utf8.h

index 6405795a5a1c53698846d11015699b61c3102224..8318c7bb0d2e3e93f110fd80cf9a126607ce9b9f 100644 (file)
--- a/icu4c/source/common/unicode/utf8.h
+++ b/icu4c/source/common/unicode/utf8.h
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 1999-2009, International Business Machines
+*   Copyright (C) 1999-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
@@ -19,11 +19,9 @@
   * \brief C API: 8-bit Unicode handling macros
   * 
   * This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
- * utf8.h is included by utf.h after unicode/umachine.h
- * and some common definitions.
   *
   * For more information see utf.h and the ICU User Guide Strings chapter
- * (http://icu-project.org/userguide/strings.html).
+ * (http://userguide.icu-project.org/strings).
   *
   * <em>Usage:</em>
   * ICU coding guidelines for if() statements should be followed when using these macros.
@@ -34,7 +32,7 @@
  #ifndef __UTF8_H__
  #define __UTF8_H__
  
-/* utf.h must be included first. */
+#include "unicode/umachine.h"
  #ifndef __UTF_H__
  #   include "unicode/utf.h"
  #endif
diff --git a/icu4c/source/common/unicode/utf_old.h b/icu4c/source/common/unicode/utf_old.h

index b360b34ae12bf8515cb01c1e6fa7a8fa104053c7..38d13efb6d9467c5c65ae5944c4d55db3b6e7e35 100644 (file)
--- a/icu4c/source/common/unicode/utf_old.h
+++ b/icu4c/source/common/unicode/utf_old.h
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 2002-2008, International Business Machines
+*   Copyright (C) 2002-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
@@ -28,9 +28,6 @@
   * If you are looking for a new equivalent to an old macro, please see the
   * comment at the old one.
   *
- * utf_old.h is included by utf.h after unicode/umachine.h
- * and some common definitions, to not break old code.
- *
   * Brief summary of reasons for deprecation:
   * - Switch on UTF_SIZE (selection of UTF-8/16/32 default string processing)
   *   was impractical.
@@ -148,10 +145,9 @@
  
  #ifndef U_HIDE_DEPRECATED_API
  
-/* utf.h must be included first. */
-#ifndef __UTF_H__
-#   include "unicode/utf.h"
-#endif
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
  
  /* Formerly utf.h, part 1 --------------------------------------------------- */
  
diff --git a/icu4c/source/common/unicode/utypes.h b/icu4c/source/common/unicode/utypes.h

index f9641c677a767f35e84955597a289d538aa1e893..b025f548ceb06a74e0c3561be05e22db0bff6854 100644 (file)
--- a/icu4c/source/common/unicode/utypes.h
+++ b/icu4c/source/common/unicode/utypes.h
@@ -34,10 +34,13 @@
  
  
  #include "unicode/umachine.h"
-#include "unicode/utf.h"
  #include "unicode/uversion.h"
  #include "unicode/uconfig.h"
-#include "float.h"
+#include <float.h>
+
+#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS
+#   include "unicode/utf.h"
+#endif
  
  /*!
   * \file
@@ -238,7 +241,7 @@ typedef double UDate;
   * ICU is separated into three libraries.
   */
  
-/*
+/**
   * \def U_COMBINED_IMPLEMENTATION
   * Set to export library symbols from inside the ICU library
   * when all of ICU is in a single library.
diff --git a/icu4c/source/common/unifilt.cpp b/icu4c/source/common/unifilt.cpp

index 69ac3070c79ac4676c0f44a2f04a4df0fc484717..856e5b972c0f0930a0a01a97cafd56ad8d308792 100644 (file)
--- a/icu4c/source/common/unifilt.cpp
+++ b/icu4c/source/common/unifilt.cpp
@@ -1,6 +1,6 @@
  /*
  **********************************************************************
-* Copyright (c) 2001-2004, International Business Machines
+* Copyright (c) 2001-2011, International Business Machines
  * Corporation and others.  All Rights Reserved.
  **********************************************************************
  *   Date        Name        Description
@@ -10,6 +10,7 @@
  
  #include "unicode/unifilt.h"
  #include "unicode/rep.h"
+#include "unicode/utf16.h"
  
  U_NAMESPACE_BEGIN
  UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(UnicodeFilter)
@@ -43,7 +44,7 @@ UMatchDegree UnicodeFilter::matches(const Replaceable& text,
      UChar32 c;
      if (offset < limit &&
          contains(c = text.char32At(offset))) {
-        offset += UTF_CHAR_LENGTH(c);
+        offset += U16_LENGTH(c);
          return U_MATCH;
      }
      if (offset > limit &&
@@ -53,7 +54,7 @@ UMatchDegree UnicodeFilter::matches(const Replaceable& text,
          // the lead surrogate).
          --offset;
          if (offset >= 0) {
-            offset -= UTF_CHAR_LENGTH(text.char32At(offset)) - 1;
+            offset -= U16_LENGTH(text.char32At(offset)) - 1;
          }
          return U_MATCH;
      }
diff --git a/icu4c/source/common/uniset.cpp b/icu4c/source/common/uniset.cpp

index bc204bfef669ede4a31eb651a4aa7e7a1aececc3..f1b3e31a20cbea7ae9a61149ce8a8f3505116580 100644 (file)
--- a/icu4c/source/common/uniset.cpp
+++ b/icu4c/source/common/uniset.cpp
@@ -9,9 +9,11 @@
  */
  
  #include "unicode/utypes.h"
-#include "unicode/uniset.h"
  #include "unicode/parsepos.h"
  #include "unicode/symtable.h"
+#include "unicode/uniset.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
  #include "ruleiter.h"
  #include "cmemory.h"
  #include "cstring.h"
@@ -1059,7 +1061,7 @@ int32_t UnicodeSet::getSingleCP(const UnicodeString& s) {
   */
  UnicodeSet& UnicodeSet::addAll(const UnicodeString& s) {
      UChar32 cp;
-    for (int32_t i = 0; i < s.length(); i += UTF_CHAR_LENGTH(cp)) {
+    for (int32_t i = 0; i < s.length(); i += U16_LENGTH(cp)) {
          cp = s.char32At(i);
          add(cp);
      }
@@ -1892,7 +1894,7 @@ void UnicodeSet::retain(const UChar32* other, int32_t otherLen, int8_t polarity)
  void UnicodeSet::_appendToPat(UnicodeString& buf, const UnicodeString& s, UBool
  escapeUnprintable) {
      UChar32 cp;
-    for (int32_t i = 0; i < s.length(); i += UTF_CHAR_LENGTH(cp)) {
+    for (int32_t i = 0; i < s.length(); i += U16_LENGTH(cp)) {
          _appendToPat(buf, cp = s.char32At(i), escapeUnprintable);
      }
  }
diff --git a/icu4c/source/common/unisetspan.cpp b/icu4c/source/common/unisetspan.cpp

index 0e43dfc3f7568920fe69b24e2a1fab41a037954a..90a9fc47497a146ab9ee52e9f78918a60c7c5a62 100644 (file)
--- a/icu4c/source/common/unisetspan.cpp
+++ b/icu4c/source/common/unisetspan.cpp
@@ -1,7 +1,7 @@
  /*
  ******************************************************************************
  *
-*   Copyright (C) 2007, International Business Machines
+*   Copyright (C) 2007-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  ******************************************************************************
@@ -17,6 +17,8 @@
  #include "unicode/utypes.h"
  #include "unicode/uniset.h"
  #include "unicode/ustring.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
  #include "cmemory.h"
  #include "uvector.h"
  #include "unisetspan.h"
diff --git a/icu4c/source/common/unistr.cpp b/icu4c/source/common/unistr.cpp

index d66edb53fd90d688b885837eda18c5a43b4ae7db..7511a1ae7d63a68195cca64f5ab969497896b09a 100644 (file)
--- a/icu4c/source/common/unistr.cpp
+++ b/icu4c/source/common/unistr.cpp
@@ -25,6 +25,8 @@
  #include "cmemory.h"
  #include "unicode/ustring.h"
  #include "unicode/unistr.h"
+#include "unicode/utf.h"
+#include "unicode/utf16.h"
  #include "uelement.h"
  #include "ustr_imp.h"
  #include "umutex.h"
@@ -158,7 +160,7 @@ UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
      allocate(capacity);
    } else {
      // count > 0, allocate and fill the new string with count c's
-    int32_t unitCount = UTF_CHAR_LENGTH(c), length = count * unitCount;
+    int32_t unitCount = U16_LENGTH(c), length = count * unitCount;
      if(capacity < length) {
        capacity = length;
      }
@@ -174,8 +176,8 @@ UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
          }
        } else {
          // get the code units for c
-        UChar units[UTF_MAX_CHAR_LENGTH];
-        UTF_APPEND_CHAR_UNSAFE(units, i, c);
+        UChar units[U16_MAX_LENGTH];
+        U16_APPEND_UNSAFE(units, i, c);
  
          // now it must be i==unitCount
          i = 0;
@@ -686,6 +688,43 @@ UnicodeString::getChar32At(int32_t offset) const {
    return char32At(offset);
  }
  
+UChar32
+UnicodeString::char32At(int32_t offset) const
+{
+  int32_t len = length();
+  if((uint32_t)offset < (uint32_t)len) {
+    const UChar *array = getArrayStart();
+    UChar32 c;
+    U16_GET(array, 0, offset, len, c);
+    return c;
+  } else {
+    return kInvalidUChar;
+  }
+}
+
+int32_t
+UnicodeString::getChar32Start(int32_t offset) const {
+  if((uint32_t)offset < (uint32_t)length()) {
+    const UChar *array = getArrayStart();
+    U16_SET_CP_START(array, 0, offset);
+    return offset;
+  } else {
+    return 0;
+  }
+}
+
+int32_t
+UnicodeString::getChar32Limit(int32_t offset) const {
+  int32_t len = length();
+  if((uint32_t)offset < (uint32_t)len) {
+    const UChar *array = getArrayStart();
+    U16_SET_CP_LIMIT(array, 0, offset, len);
+    return offset;
+  } else {
+    return len;
+  }
+}
+
  int32_t
  UnicodeString::countChar32(int32_t start, int32_t length) const {
    pinIndices(start, length);
@@ -712,9 +751,9 @@ UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
  
    const UChar *array = getArrayStart();
    if(delta>0) {
-    UTF_FWD_N(array, index, len, delta);
+    U16_FWD_N(array, index, len, delta);
    } else {
-    UTF_BACK_N(array, 0, index, -delta);
+    U16_BACK_N(array, 0, index, -delta);
    }
  
    return index;
@@ -1196,6 +1235,26 @@ UnicodeString::setCharAt(int32_t offset,
    return *this;
  }
  
+UnicodeString&
+UnicodeString::replace(int32_t start,
+               int32_t _length,
+               UChar32 srcChar) {
+  UChar buffer[U16_MAX_LENGTH];
+  int32_t count = 0;
+  UBool isError = FALSE;
+  U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
+  return doReplace(start, _length, buffer, 0, count);
+}
+
+UnicodeString&
+UnicodeString::append(UChar32 srcChar) {
+  UChar buffer[U16_MAX_LENGTH];
+  int32_t _length = 0;
+  UBool isError = FALSE;
+  U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
+  return doReplace(length(), 0, buffer, 0, _length);
+}
+
  UnicodeString&
  UnicodeString::doReplace( int32_t start,
                int32_t length,
diff --git a/icu4c/source/common/unistr_props.cpp b/icu4c/source/common/unistr_props.cpp

index 7670de465548abfd0ceda76a3d4d2a4fb34a3014..b49a4b4be6d0bbb401d0d6d062a01a4e321cb6bf 100644 (file)
--- a/icu4c/source/common/unistr_props.cpp
+++ b/icu4c/source/common/unistr_props.cpp
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 1999-2007, International Business Machines
+*   Copyright (C) 1999-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
@@ -19,6 +19,7 @@
  #include "unicode/utypes.h"
  #include "unicode/uchar.h"
  #include "unicode/unistr.h"
+#include "unicode/utf16.h"
  
  U_NAMESPACE_BEGIN
  
diff --git a/icu4c/source/common/unorm_it.c b/icu4c/source/common/unorm_it.c

index 00396bd6daa4f76f07a58f5483bd0c1de132969f..3adb1ce07ed858802b6e4c823775819df664118b 100644 (file)
--- a/icu4c/source/common/unorm_it.c
+++ b/icu4c/source/common/unorm_it.c
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 2003-2008, International Business Machines
+*   Copyright (C) 2003-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
@@ -20,6 +20,7 @@
  
  #include "unicode/uiter.h"
  #include "unicode/unorm.h"
+#include "unicode/utf.h"
  #include "unorm_it.h"
  #include "cmemory.h"
  
diff --git a/icu4c/source/common/uresdata.c b/icu4c/source/common/uresdata.c

index 456f1a3740fa917a1910051e20483fe5e19e0dca..6ea0b97fcde75984c6f0aa674549d4587260068c 100644 (file)
--- a/icu4c/source/common/uresdata.c
+++ b/icu4c/source/common/uresdata.c
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *                                                                             *
-* Copyright (C) 1999-2010, International Business Machines Corporation        *
+* Copyright (C) 1999-2011, International Business Machines Corporation        *
  *               and others. All Rights Reserved.                              *
  *                                                                             *
  *******************************************************************************
@@ -22,6 +22,7 @@
  #include "unicode/utypes.h"
  #include "unicode/udata.h"
  #include "unicode/ustring.h"
+#include "unicode/utf16.h"
  #include "cmemory.h"
  #include "cstring.h"
  #include "uarrsort.h"
diff --git a/icu4c/source/common/ushape.cpp b/icu4c/source/common/ushape.cpp

index c34f86dc0bee3a6310d861a587fb74862bde22cb..c90ee73a8a96c3254b881cdc12076a8605f56e88 100644 (file)
--- a/icu4c/source/common/ushape.cpp
+++ b/icu4c/source/common/ushape.cpp
@@ -27,17 +27,13 @@
  
  #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
  
-#if UTF_SIZE<16
-    /*
-     * This implementation assumes that the internal encoding is UTF-16
-     * or UTF-32, not UTF-8.
-     * The main assumption is that the Arabic characters and their
-     * presentation forms each fit into a single UChar.
-     * With UTF-8, they occupy 2 or 3 bytes, and more than the ASCII
-     * characters.
-     */
-#   error This implementation assumes UTF-16 or UTF-32 (check UTF_SIZE)
-#endif
+/*
+ * This implementation is designed for 16-bit Unicode strings.
+ * The main assumption is that the Arabic characters and their
+ * presentation forms each fit into a single UChar.
+ * With UTF-8, they occupy 2 or 3 bytes, and more than the ASCII
+ * characters.
+ */
  
  /*
   * ### TODO in general for letter shaping:
diff --git a/icu4c/source/common/ustrcase.cpp b/icu4c/source/common/ustrcase.cpp

index 17468c9d493d5ebffd93c06f3cb62cc05976c456..b0b4ef7c04ea32e767e40e644067c66b499bc527 100644 (file)
--- a/icu4c/source/common/ustrcase.cpp
+++ b/icu4c/source/common/ustrcase.cpp
@@ -23,6 +23,8 @@
  #include "unicode/ustring.h"
  #include "unicode/ucasemap.h"
  #include "unicode/ubrk.h"
+#include "unicode/utf.h"
+#include "unicode/utf16.h"
  #include "cmemory.h"
  #include "ucase.h"
  #include "ustr_imp.h"
diff --git a/icu4c/source/common/ustring.cpp b/icu4c/source/common/ustring.cpp

index 297b095d2d765f89405ff1759afb3449db85e4f9..3210cfc215e2ec4d59c039beee5bd9a5d3baa67b 100644 (file)
--- a/icu4c/source/common/ustring.cpp
+++ b/icu4c/source/common/ustring.cpp
@@ -18,6 +18,7 @@
  #include "unicode/utypes.h"
  #include "unicode/putil.h"
  #include "unicode/ustring.h"
+#include "unicode/utf16.h"
  #include "cstring.h"
  #include "cwchar.h"
  #include "cmemory.h"
@@ -791,8 +792,8 @@ uprv_strCompare(const UChar *s1, int32_t length1,
      if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
          /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
          if(
-            (c1<=0xdbff && (s1+1)!=limit1 && UTF_IS_TRAIL(*(s1+1))) ||
-            (UTF_IS_TRAIL(c1) && start1!=s1 && UTF_IS_LEAD(*(s1-1)))
+            (c1<=0xdbff && (s1+1)!=limit1 && U16_IS_TRAIL(*(s1+1))) ||
+            (U16_IS_TRAIL(c1) && start1!=s1 && U16_IS_LEAD(*(s1-1)))
          ) {
              /* part of a surrogate pair, leave >=d800 */
          } else {
@@ -801,8 +802,8 @@ uprv_strCompare(const UChar *s1, int32_t length1,
          }
  
          if(
-            (c2<=0xdbff && (s2+1)!=limit2 && UTF_IS_TRAIL(*(s2+1))) ||
-            (UTF_IS_TRAIL(c2) && start2!=s2 && UTF_IS_LEAD(*(s2-1)))
+            (c2<=0xdbff && (s2+1)!=limit2 && U16_IS_TRAIL(*(s2+1))) ||
+            (U16_IS_TRAIL(c2) && start2!=s2 && U16_IS_LEAD(*(s2-1)))
          ) {
              /* part of a surrogate pair, leave >=d800 */
          } else {
@@ -853,8 +854,8 @@ u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrde
      if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
          /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
          if(
-            (c1<=0xdbff && UTF_IS_TRAIL(iter1->current(iter1))) ||
-            (UTF_IS_TRAIL(c1) && (iter1->previous(iter1), UTF_IS_LEAD(iter1->previous(iter1))))
+            (c1<=0xdbff && U16_IS_TRAIL(iter1->current(iter1))) ||
+            (U16_IS_TRAIL(c1) && (iter1->previous(iter1), U16_IS_LEAD(iter1->previous(iter1))))
          ) {
              /* part of a surrogate pair, leave >=d800 */
          } else {
@@ -863,8 +864,8 @@ u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrde
          }
  
          if(
-            (c2<=0xdbff && UTF_IS_TRAIL(iter2->current(iter2))) ||
-            (UTF_IS_TRAIL(c2) && (iter2->previous(iter2), UTF_IS_LEAD(iter2->previous(iter2))))
+            (c2<=0xdbff && U16_IS_TRAIL(iter2->current(iter2))) ||
+            (U16_IS_TRAIL(c2) && (iter2->previous(iter2), U16_IS_LEAD(iter2->previous(iter2))))
          ) {
              /* part of a surrogate pair, leave >=d800 */
          } else {
@@ -897,14 +898,14 @@ u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrde
  void fragment {
          /* iff a surrogate is part of a surrogate pair, leave >=d800 */
          if(c1<=0xdbff) {
-            if(!UTF_IS_TRAIL(iter1->current(iter1))) {
+            if(!U16_IS_TRAIL(iter1->current(iter1))) {
                  /* lead surrogate code point - make <d800 */
                  c1-=0x2800;
              }
          } else if(c1<=0xdfff) {
              int32_t idx=iter1->getIndex(iter1, UITER_CURRENT);
              iter1->previous(iter1); /* ==c1 */
-            if(!UTF_IS_LEAD(iter1->previous(iter1))) {
+            if(!U16_IS_LEAD(iter1->previous(iter1))) {
                  /* trail surrogate code point - make <d800 */
                  c1-=0x2800;
              }
@@ -1012,7 +1013,7 @@ u_countChar32(const UChar *s, int32_t length) {
      if(length>=0) {
          while(length>0) {
              ++count;
-            if(UTF_IS_LEAD(*s) && length>=2 && UTF_IS_TRAIL(*(s+1))) {
+            if(U16_IS_LEAD(*s) && length>=2 && U16_IS_TRAIL(*(s+1))) {
                  s+=2;
                  length-=2;
              } else {
@@ -1033,7 +1034,7 @@ u_countChar32(const UChar *s, int32_t length) {
               * sufficient to look ahead one because of UTF-16;
               * safe to look ahead one because at worst that would be the terminating NUL
               */
-            if(UTF_IS_LEAD(c) && UTF_IS_TRAIL(*s)) {
+            if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
                  ++s;
              }
          }
@@ -1306,11 +1307,11 @@ u_unescapeAt(UNESCAPE_CHAR_AT charAt,
      /* Map \cX to control-X: X & 0x1F */
      if (c == 0x0063 /*'c'*/ && *offset < length) {
          c = charAt((*offset)++, context);
-        if (UTF_IS_FIRST_SURROGATE(c) && *offset < length) {
+        if (U16_IS_LEAD(c) && *offset < length) {
              UChar c2 = charAt(*offset, context);
-            if (UTF_IS_SECOND_SURROGATE(c2)) {
+            if (U16_IS_TRAIL(c2)) {
                  ++(*offset);
-                c = (UChar) UTF16_GET_PAIR_VALUE(c, c2); /* [sic] */
+                c = (UChar) U16_GET_SUPPLEMENTARY(c, c2); /* [sic] */
              }
          }
          return 0x1F & c;
@@ -1319,11 +1320,11 @@ u_unescapeAt(UNESCAPE_CHAR_AT charAt,
      /* If no special forms are recognized, then consider
       * the backslash to generically escape the next character.
       * Deal with surrogate pairs. */
-    if (UTF_IS_FIRST_SURROGATE(c) && *offset < length) {
+    if (U16_IS_LEAD(c) && *offset < length) {
          UChar c2 = charAt(*offset, context);
-        if (UTF_IS_SECOND_SURROGATE(c2)) {
+        if (U16_IS_TRAIL(c2)) {
              ++(*offset);
-            return UTF16_GET_PAIR_VALUE(c, c2);
+            return U16_GET_SUPPLEMENTARY(c, c2);
          }
      }
      return c;
@@ -1383,10 +1384,10 @@ u_unescape(const char *src, UChar *dest, int32_t destCapacity) {
                  goto err;
              }
              src += lenParsed; /* advance past escape seq. */
-            if (dest != NULL && UTF_CHAR_LENGTH(c32) <= (destCapacity - i)) {
-                UTF_APPEND_CHAR_UNSAFE(dest, i, c32);
+            if (dest != NULL && U16_LENGTH(c32) <= (destCapacity - i)) {
+                U16_APPEND_UNSAFE(dest, i, c32);
              } else {
-                i += UTF_CHAR_LENGTH(c32);
+                i += U16_LENGTH(c32);
              }
              segment = src;
          } else {
diff --git a/icu4c/source/common/ustrtrns.cpp b/icu4c/source/common/ustrtrns.cpp

index 6c17b661202aeeae90f3508e1c50ff9a1149e06a..beecd91506c5034c960a8c35fb0798f9e3d6b7aa 100644 (file)
--- a/icu4c/source/common/ustrtrns.cpp
+++ b/icu4c/source/common/ustrtrns.cpp
@@ -26,6 +26,9 @@
  
  #include "unicode/putil.h"
  #include "unicode/ustring.h"
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
  #include "cstring.h"
  #include "cmemory.h"
  #include "ustr_imp.h"
@@ -268,8 +271,8 @@ static UChar32
  utf8_nextCharSafeBodyTerminated(const uint8_t **ps, UChar32 c) {
      const uint8_t *s=*ps;
      uint8_t trail, illegal=0;
-    uint8_t count=UTF8_COUNT_TRAIL_BYTES(c);
-    UTF8_MASK_LEAD_BYTE((c), count);
+    uint8_t count=U8_COUNT_TRAIL_BYTES(c);
+    U8_MASK_LEAD_BYTE((c), count);
      /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */
      switch(count) {
      /* each branch falls through to the next one */
@@ -309,11 +312,11 @@ utf8_nextCharSafeBodyTerminated(const uint8_t **ps, UChar32 c) {
  
      /* correct sequence - all trail bytes have (b7..b6)==(10)? */
      /* illegal is also set if count>=4 */
-    if(illegal || c<utf8_minLegal[count] || UTF_IS_SURROGATE(c)) {
+    if(illegal || c<utf8_minLegal[count] || U_IS_SURROGATE(c)) {
          /* error handling */
          /* don't go beyond this sequence */
          s=*ps;
-        while(count>0 && UTF8_IS_TRAIL(*s)) {
+        while(count>0 && U8_IS_TRAIL(*s)) {
              ++s;
              --count;
          }
@@ -336,9 +339,9 @@ static UChar32
  utf8_nextCharSafeBodyPointer(const uint8_t **ps, const uint8_t *limit, UChar32 c) {
      const uint8_t *s=*ps;
      uint8_t trail, illegal=0;
-    uint8_t count=UTF8_COUNT_TRAIL_BYTES(c);
+    uint8_t count=U8_COUNT_TRAIL_BYTES(c);
      if((limit-s)>=count) {
-        UTF8_MASK_LEAD_BYTE((c), count);
+        U8_MASK_LEAD_BYTE((c), count);
          /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */
          switch(count) {
          /* each branch falls through to the next one */
@@ -376,11 +379,11 @@ utf8_nextCharSafeBodyPointer(const uint8_t **ps, const uint8_t *limit, UChar32 c
  
      /* correct sequence - all trail bytes have (b7..b6)==(10)? */
      /* illegal is also set if count>=4 */
-    if(illegal || c<utf8_minLegal[count] || UTF_IS_SURROGATE(c)) {
+    if(illegal || c<utf8_minLegal[count] || U_IS_SURROGATE(c)) {
          /* error handling */
          /* don't go beyond this sequence */
          s=*ps;
-        while(count>0 && s<limit && UTF8_IS_TRAIL(*s)) {
+        while(count>0 && s<limit && U8_IS_TRAIL(*s)) {
              ++s;
              --count;
          }
@@ -479,9 +482,9 @@ u_strFromUTF8WithSub(UChar *dest,
                  } else if(ch<=0xFFFF) {
                      *(pDest++)=(UChar)ch;
                  } else {
-                    *(pDest++)=UTF16_LEAD(ch);
+                    *(pDest++)=U16_LEAD(ch);
                      if(pDest<pDestLimit) {
-                        *(pDest++)=UTF16_TRAIL(ch);
+                        *(pDest++)=U16_TRAIL(ch);
                      } else {
                          reqLength++;
                          break;
@@ -600,8 +603,8 @@ u_strFromUTF8WithSub(UChar *dest,
                      }else if(ch<=0xFFFF){
                          *(pDest++)=(UChar)ch;
                      }else{
-                        *(pDest++)=UTF16_LEAD(ch);
-                        *(pDest++)=UTF16_TRAIL(ch);
+                        *(pDest++)=U16_LEAD(ch);
+                        *(pDest++)=U16_TRAIL(ch);
                      }
                  }
              } while(--count > 0);
@@ -646,9 +649,9 @@ u_strFromUTF8WithSub(UChar *dest,
                  }else if(ch<=0xFFFF){
                      *(pDest++)=(UChar)ch;
                  }else{
-                    *(pDest++)=UTF16_LEAD(ch);
+                    *(pDest++)=U16_LEAD(ch);
                      if(pDest<pDestLimit){
-                        *(pDest++)=UTF16_TRAIL(ch);
+                        *(pDest++)=U16_TRAIL(ch);
                      }else{
                          reqLength++;
                          break;
@@ -693,7 +696,7 @@ u_strFromUTF8WithSub(UChar *dest,
                      *pErrorCode = U_INVALID_CHAR_FOUND;
                      return NULL;
                  }
-                reqLength+=UTF_CHAR_LENGTH(ch);
+                reqLength+=U16_LENGTH(ch);
              }
          }
      }
@@ -1029,10 +1032,10 @@ u_strToUTF8WithSub(char *dest,
              } else /* ch is a surrogate */ {
                  int32_t length;
  
-                /*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/
-                if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_TRAIL(ch2=*pSrc)) { 
+                /*need not check for NUL because NUL fails U16_IS_TRAIL() anyway*/
+                if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) { 
                      ++pSrc;
-                    ch=UTF16_GET_PAIR_VALUE(ch, ch2);
+                    ch=U16_GET_SUPPLEMENTARY(ch, ch2);
                  } else if(subchar>=0) {
                      ch=subchar;
                      ++numSubstitutions;
@@ -1057,9 +1060,9 @@ u_strToUTF8WithSub(char *dest,
                  ++reqLength;
              } else if(ch<=0x7ff) {
                  reqLength+=2;
-            } else if(!UTF_IS_SURROGATE(ch)) {
+            } else if(!U16_IS_SURROGATE(ch)) {
                  reqLength+=3;
-            } else if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
+            } else if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) {
                  ++pSrc;
                  reqLength+=4;
              } else if(subchar>=0) {
@@ -1117,9 +1120,9 @@ u_strToUTF8WithSub(char *dest,
                          break;  /* recompute count */
                      }
  
-                    if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_TRAIL(ch2=*pSrc)) { 
+                    if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) { 
                          ++pSrc;
-                        ch=UTF16_GET_PAIR_VALUE(ch, ch2);
+                        ch=U16_GET_SUPPLEMENTARY(ch, ch2);
  
                          /* writing 4 bytes per 2 UChars is ok */
                          *pDest++=(uint8_t)((ch>>18)|0xf0);
@@ -1172,9 +1175,9 @@ u_strToUTF8WithSub(char *dest,
              } else /* ch is a surrogate */ {
                  int32_t length;
  
-                if(UTF_IS_SURROGATE_FIRST(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) { 
+                if(U16_IS_SURROGATE_LEAD(ch) && pSrc<pSrcLimit && U16_IS_TRAIL(ch2=*pSrc)) { 
                      ++pSrc;
-                    ch=UTF16_GET_PAIR_VALUE(ch, ch2);
+                    ch=U16_GET_SUPPLEMENTARY(ch, ch2);
                  } else if(subchar>=0) {
                      ch=subchar;
                      ++numSubstitutions;
@@ -1200,9 +1203,9 @@ u_strToUTF8WithSub(char *dest,
                  ++reqLength;
              } else if(ch<=0x7ff) {
                  reqLength+=2;
-            } else if(!UTF_IS_SURROGATE(ch)) {
+            } else if(!U16_IS_SURROGATE(ch)) {
                  reqLength+=3;
-            } else if(UTF_IS_SURROGATE_FIRST(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
+            } else if(U16_IS_SURROGATE_LEAD(ch) && pSrc<pSrcLimit && U16_IS_TRAIL(ch2=*pSrc)) {
                  ++pSrc;
                  reqLength+=4;
              } else if(subchar>=0) {
diff --git a/icu4c/source/common/utext.cpp b/icu4c/source/common/utext.cpp

index 10ebc3296b3fb83ab73900c797b02005f8fb11c7..30bfdeaa2caac3c304212fc376cdd5bb3922ef4e 100644 (file)
--- a/icu4c/source/common/utext.cpp
+++ b/icu4c/source/common/utext.cpp
@@ -19,6 +19,9 @@
  #include "unicode/unistr.h"
  #include "unicode/chariter.h"
  #include "unicode/utext.h"
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
  #include "ustr_imp.h"
  #include "cmemory.h"
  #include "cstring.h"
@@ -118,13 +121,13 @@ utext_setNativeIndex(UText *ut, int64_t index) {
      // Adjust the index position if it is in the middle of a surrogate pair.
      if (ut->chunkOffset<ut->chunkLength) {
          UChar c= ut->chunkContents[ut->chunkOffset];
-        if (UTF16_IS_TRAIL(c)) {
+        if (U16_IS_TRAIL(c)) {
              if (ut->chunkOffset==0) {
                  ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE);
              }
              if (ut->chunkOffset>0) {
                  UChar lead = ut->chunkContents[ut->chunkOffset-1];
-                if (UTF16_IS_LEAD(lead)) {
+                if (U16_IS_LEAD(lead)) {
                      ut->chunkOffset--;
                  }
              }
@@ -1768,9 +1771,9 @@ utext_strFromUTF8(UChar *dest,
              if(U_IS_BMP(ch)){
                  *(pDest++)=(UChar)ch;
              }else{
-                *(pDest++)=UTF16_LEAD(ch);
+                *(pDest++)=U16_LEAD(ch);
                  if(pDest<pDestLimit){
-                    *(pDest++)=UTF16_TRAIL(ch);
+                    *(pDest++)=U16_TRAIL(ch);
                  }else{
                      reqLength++;
                      break;
diff --git a/icu4c/source/common/utf_impl.c b/icu4c/source/common/utf_impl.c

index 8f45546bb767dc25c9440e88d99cf3ae38f6884f..982ce0655933f3bd8ce43840914780bbeddc3e09 100644 (file)
--- a/icu4c/source/common/utf_impl.c
+++ b/icu4c/source/common/utf_impl.c
@@ -1,7 +1,7 @@
  /*
  ******************************************************************************
  *
-*   Copyright (C) 1999-2006, International Business Machines
+*   Copyright (C) 1999-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  ******************************************************************************
@@ -23,6 +23,9 @@
  #endif
  
  #include "unicode/utypes.h"
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf_old.h"
  
  /*
   * This table could be replaced on many machines by
@@ -107,11 +110,11 @@ utf8_errorValue[6]={
  U_CAPI UChar32 U_EXPORT2
  utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict) {
      int32_t i=*pi;
-    uint8_t count=UTF8_COUNT_TRAIL_BYTES(c);
+    uint8_t count=U8_COUNT_TRAIL_BYTES(c);
      if((i)+count<=(length)) {
          uint8_t trail, illegal=0;
  
-        UTF8_MASK_LEAD_BYTE((c), count);
+        U8_MASK_LEAD_BYTE((c), count);
          /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */
          switch(count) {
          /* each branch falls through to the next one */
@@ -161,12 +164,12 @@ utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c,
  
          /* correct sequence - all trail bytes have (b7..b6)==(10)? */
          /* illegal is also set if count>=4 */
-        if(illegal || (c)<utf8_minLegal[count] || (UTF_IS_SURROGATE(c) && strict!=-2)) {
+        if(illegal || (c)<utf8_minLegal[count] || (U_IS_SURROGATE(c) && strict!=-2)) {
              /* error handling */
              uint8_t errorCount=count;
              /* don't go beyond this sequence */
              i=*pi;
-            while(count>0 && UTF8_IS_TRAIL(s[i])) {
+            while(count>0 && U8_IS_TRAIL(s[i])) {
                  ++(i);
                  --count;
              }
@@ -175,7 +178,7 @@ utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c,
              } else {
                  c=U_SENTINEL;
              }
-        } else if((strict)>0 && UTF_IS_UNICODE_NONCHAR(c)) {
+        } else if((strict)>0 && U_IS_UNICODE_NONCHAR(c)) {
              /* strict: forbid non-characters like U+fffe */
              c=utf8_errorValue[count];
          }
@@ -183,7 +186,7 @@ utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c,
          /* error handling */
          int32_t i0=i;
          /* don't just set (i)=(length) in case there is an illegal sequence */
-        while((i)<(length) && UTF8_IS_TRAIL(s[i])) {
+        while((i)<(length) && U8_IS_TRAIL(s[i])) {
              ++(i);
          }
          if(strict>=0) {
@@ -265,14 +268,14 @@ utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, U
          if((uint8_t)(b-0x80)<0x7e) { /* 0x80<=b<0xfe */
              if(b&0x40) {
                  /* lead byte, this will always end the loop */
-                uint8_t shouldCount=UTF8_COUNT_TRAIL_BYTES(b);
+                uint8_t shouldCount=U8_COUNT_TRAIL_BYTES(b);
  
                  if(count==shouldCount) {
                      /* set the new position */
                      *pi=i;
-                    UTF8_MASK_LEAD_BYTE(b, count);
+                    U8_MASK_LEAD_BYTE(b, count);
                      c|=(UChar32)b<<shift;
-                    if(count>=4 || c>0x10ffff || c<utf8_minLegal[count] || (UTF_IS_SURROGATE(c) && strict!=-2) || (strict>0 && UTF_IS_UNICODE_NONCHAR(c))) {
+                    if(count>=4 || c>0x10ffff || c<utf8_minLegal[count] || (U_IS_SURROGATE(c) && strict!=-2) || (strict>0 && U_IS_UNICODE_NONCHAR(c))) {
                          /* illegal sequence or (strict and non-character) */
                          if(count>=4) {
                              count=3;
@@ -351,7 +354,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i) {
          if((uint8_t)(b-0x80)>=0x7e) { /* not 0x80<=b<0xfe */
              break;
          } else if(b>=0xc0) {
-            if(UTF8_COUNT_TRAIL_BYTES(b)>=(i-I)) {
+            if(U8_COUNT_TRAIL_BYTES(b)>=(i-I)) {
                  return I;
              } else {
                  break;
diff --git a/icu4c/source/common/util.cpp b/icu4c/source/common/util.cpp

index e67ef4642d19484671c059f8a7f380227e8f2a8d..acb15854a9e3e26d23436357f0800dd112521710 100644 (file)
--- a/icu4c/source/common/util.cpp
+++ b/icu4c/source/common/util.cpp
@@ -9,6 +9,7 @@
  */
  
  #include "unicode/unimatch.h"
+#include "unicode/utf16.h"
  #include "patternprops.h"
  #include "util.h"
  
@@ -170,9 +171,9 @@ int32_t ICU_Utility::skipWhitespace(const UnicodeString& str, int32_t& pos,
  //?    while (pos != stop &&
  //?           PatternProps::isWhiteSpace(c = text.char32At(pos))) {
  //?        if (isForward) {
-//?            pos += UTF_CHAR_LENGTH(c);
+//?            pos += U16_LENGTH(c);
  //?        } else {
-//?            pos -= UTF_CHAR_LENGTH(c);
+//?            pos -= U16_LENGTH(c);
  //?        }
  //?    }
  //?
@@ -242,7 +243,7 @@ int32_t ICU_Utility::parsePattern(const UnicodeString& pat,
          // parse \s*
          if (cpat == 126 /*~*/) {
              if (PatternProps::isWhiteSpace(c)) {
-                index += UTF_CHAR_LENGTH(c);
+                index += U16_LENGTH(c);
                  continue;
              } else {
                  if (++ipat == pat.length()) {
@@ -254,8 +255,8 @@ int32_t ICU_Utility::parsePattern(const UnicodeString& pat,
  
          // parse literal
          else if (c == cpat) {
-            index += UTF_CHAR_LENGTH(c);
-            ipat += UTF_CHAR_LENGTH(cpat);
+            index += U16_LENGTH(c);
+            ipat += U16_LENGTH(cpat);
              if (ipat == pat.length()) {
                  return index; // success; c parsed
              }
diff --git a/icu4c/source/common/util_props.cpp b/icu4c/source/common/util_props.cpp

index 2ee536b9b227c3034c84872b7ac390b5ef634645..1bdaaddb71afcce1d09ada5726ca5e4b8aa3d80e 100644 (file)
--- a/icu4c/source/common/util_props.cpp
+++ b/icu4c/source/common/util_props.cpp
@@ -9,6 +9,7 @@
  */
  
  #include "unicode/uchar.h"
+#include "unicode/utf16.h"
  #include "patternprops.h"
  #include "util.h"
  
@@ -159,7 +160,7 @@ UnicodeString ICU_Utility::parseUnicodeIdentifier(const UnicodeString& str, int3
                  break;
              }
          }
-        p += UTF_CHAR_LENGTH(ch);
+        p += U16_LENGTH(ch);
      }
      pos = p;
      return buf;
diff --git a/icu4c/source/common/utrie.h b/icu4c/source/common/utrie.h

index 2a9dc0db39f109653c1474cbd3e9f3423f00465a..3cec027fe34595e5b75e1f2850d206b62a94ef87 100644 (file)
--- a/icu4c/source/common/utrie.h
+++ b/icu4c/source/common/utrie.h
@@ -1,7 +1,7 @@
  /*
  ******************************************************************************
  *
-*   Copyright (C) 2001-2008, International Business Machines
+*   Copyright (C) 2001-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  ******************************************************************************
@@ -18,6 +18,7 @@
  #define __UTRIE_H__
  
  #include "unicode/utypes.h"
+#include "unicode/utf16.h"
  #include "udataswp.h"
  
  U_CDECL_BEGIN
@@ -210,7 +211,7 @@ typedef struct UTrie UTrie;
          (result)=_UTRIE_GET_FROM_BMP(trie, data, c32); \
      } else if((uint32_t)(c32)<=0x10ffff) { \
          /* supplementary code point */ \
-        UChar __lead16=UTF16_LEAD(c32); \
+        UChar __lead16=U16_LEAD(c32); \
          _UTRIE_GET_FROM_PAIR(trie, data, __lead16, c32, result, resultType); \
      } else { \
          /* out of range */ \
@@ -220,10 +221,10 @@ typedef struct UTrie UTrie;
  /** Internal next-post-increment: get the next code point (c, c2) and its data */
  #define _UTRIE_NEXT(trie, data, src, limit, c, c2, result, resultType) { \
      (c)=*(src)++; \
-    if(!UTF_IS_LEAD(c)) { \
+    if(!U16_IS_LEAD(c)) { \
          (c2)=0; \
          (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \
-    } else if((src)!=(limit) && UTF_IS_TRAIL((c2)=*(src))) { \
+    } else if((src)!=(limit) && U16_IS_TRAIL((c2)=*(src))) { \
          ++(src); \
          _UTRIE_GET_FROM_PAIR((trie), data, (c), (c2), (result), resultType); \
      } else { \
@@ -236,12 +237,12 @@ typedef struct UTrie UTrie;
  /** Internal previous: get the previous code point (c, c2) and its data */
  #define _UTRIE_PREVIOUS(trie, data, start, src, c, c2, result, resultType) { \
      (c)=*--(src); \
-    if(!UTF_IS_SURROGATE(c)) { \
+    if(!U16_IS_SURROGATE(c)) { \
          (c2)=0; \
          (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \
-    } else if(!UTF_IS_SURROGATE_FIRST(c)) { \
+    } else if(!U16_IS_SURROGATE_LEAD(c)) { \
          /* trail surrogate */ \
-        if((start)!=(src) && UTF_IS_LEAD((c2)=*((src)-1))) { \
+        if((start)!=(src) && U16_IS_LEAD((c2)=*((src)-1))) { \
              --(src); \
              (result)=(c); (c)=(c2); (c2)=(UChar)(result); /* swap c, c2 */ \
              _UTRIE_GET_FROM_PAIR((trie), data, (c), (c2), (result), resultType); \
diff --git a/icu4c/source/common/utrie2.cpp b/icu4c/source/common/utrie2.cpp

index e07860ef9254f86cc87389167e71b2e903421ca0..40650fe6b4da7a3f9a3b21b2580b5cbc6000a132 100644 (file)
--- a/icu4c/source/common/utrie2.cpp
+++ b/icu4c/source/common/utrie2.cpp
@@ -27,6 +27,9 @@
  #endif
  
  #include "unicode/utypes.h"
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
  #include "cmemory.h"
  #include "utrie2.h"
  #include "utrie2_impl.h"
diff --git a/icu4c/source/common/utrie2.h b/icu4c/source/common/utrie2.h

index fa8c8f3aa284146308f523553961e5bf0fce58e3..439d0d3dfe4465e543d1535b3b1db0b939abf457 100644 (file)
--- a/icu4c/source/common/utrie2.h
+++ b/icu4c/source/common/utrie2.h
@@ -620,6 +620,7 @@ U_CDECL_END
  
  #ifdef __cplusplus
  
+#include "unicode/utf.h"
  #include "mutex.h"
  
  U_NAMESPACE_BEGIN
diff --git a/icu4c/source/common/uts46.cpp b/icu4c/source/common/uts46.cpp

index aa9b1a6fe7dd8e22da97c293673a5a32042c018d..70a97845e150b2cee689b3e724840f98a1572ad6 100644 (file)
--- a/icu4c/source/common/uts46.cpp
+++ b/icu4c/source/common/uts46.cpp
@@ -20,6 +20,7 @@
  #include "unicode/normalizer2.h"
  #include "unicode/uscript.h"
  #include "unicode/ustring.h"
+#include "unicode/utf16.h"
  #include "cmemory.h"
  #include "cstring.h"
  #include "punycode.h"
diff --git a/icu4c/source/i18n/bocsu.cpp b/icu4c/source/i18n/bocsu.cpp

index 24ff1a46c2d6c1353e6e10e84e51e2cafae5e172..6e771a88b8eb9585d5f7ca2d8ff4a9e6f3c852e3 100644 (file)
--- a/icu4c/source/i18n/bocsu.cpp
+++ b/icu4c/source/i18n/bocsu.cpp
@@ -16,10 +16,11 @@
  
  
  #include "unicode/utypes.h"
-#include "unicode/bytestream.h"
  
  #if !UCONFIG_NO_COLLATION
  
+#include "unicode/bytestream.h"
+#include "unicode/utf16.h"
  #include "bocsu.h"
  
  /*
diff --git a/icu4c/source/i18n/casetrn.cpp b/icu4c/source/i18n/casetrn.cpp

index 5038c51da3d66f74f74414f454fc07a1efb6186e..20a73e342949c67086adf7832972dc8002a568ee 100644 (file)
--- a/icu4c/source/i18n/casetrn.cpp
+++ b/icu4c/source/i18n/casetrn.cpp
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 2001-2010, International Business Machines
+*   Copyright (C) 2001-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
@@ -22,6 +22,8 @@
  
  #include "unicode/uchar.h"
  #include "unicode/ustring.h"
+#include "unicode/utf.h"
+#include "unicode/utf16.h"
  #include "tolowtrn.h"
  #include "ucase.h"
  #include "cpputils.h"
diff --git a/icu4c/source/i18n/decimfmt.cpp b/icu4c/source/i18n/decimfmt.cpp

index a9d7d17285ca4e427b5e64846920398b165b2348..edc48dce4ad1248413352bc04b59fb6b4397bd01 100644 (file)
--- a/icu4c/source/i18n/decimfmt.cpp
+++ b/icu4c/source/i18n/decimfmt.cpp
@@ -53,6 +53,7 @@
  #include "unicode/curramt.h"
  #include "unicode/currpinf.h"
  #include "unicode/plurrule.h"
+#include "unicode/utf16.h"
  #include "uresimp.h"
  #include "ucurrimp.h"
  #include "charstr.h"
diff --git a/icu4c/source/i18n/esctrn.cpp b/icu4c/source/i18n/esctrn.cpp

index 0af201afc37a7a5bd4e56662f1ffe0b363bcb18a..85c38cc2d8c9c8484faed975ed3d09b1d1a333c1 100644 (file)
--- a/icu4c/source/i18n/esctrn.cpp
+++ b/icu4c/source/i18n/esctrn.cpp
@@ -12,6 +12,7 @@
  
  #if !UCONFIG_NO_TRANSLITERATION
  
+#include "unicode/utf16.h"
  #include "esctrn.h"
  #include "util.h"
  
@@ -140,7 +141,7 @@ void EscapeTransliterator::handleTransliterate(Replaceable& text,
  
      while (start < limit) {
          int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start);
-        int32_t charLen = grokSupplementals ? UTF_CHAR_LENGTH(c) : 1;
+        int32_t charLen = grokSupplementals ? U16_LENGTH(c) : 1;
  
          if ((c & 0xFFFF0000) != 0 && supplementalHandler != NULL) {
              buf.truncate(0);
diff --git a/icu4c/source/i18n/name2uni.cpp b/icu4c/source/i18n/name2uni.cpp

index b66bcfeda1df9ccc8e895302a21fddc80af042c8..f83f96aedd91bbe13c8ec9dda88b6708e709b542 100644 (file)
--- a/icu4c/source/i18n/name2uni.cpp
+++ b/icu4c/source/i18n/name2uni.cpp
@@ -15,6 +15,7 @@
  #include "unicode/unifilt.h"
  #include "unicode/uchar.h"
  #include "unicode/uniset.h"
+#include "unicode/utf16.h"
  #include "cmemory.h"
  #include "name2uni.h"
  #include "patternprops.h"
@@ -194,7 +195,7 @@ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPos
                      if (U_SUCCESS(status)) {
                          // Lookup succeeded
  
-                        // assert(UTF_CHAR_LENGTH(CLOSE_DELIM) == 1);
+                        // assert(U16_LENGTH(CLOSE_DELIM) == 1);
                          cursor++; // advance over CLOSE_DELIM
  
                          str.truncate(0);
@@ -238,7 +239,7 @@ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPos
              break;
          }
  
-        cursor += UTF_CHAR_LENGTH(c);
+        cursor += U16_LENGTH(c);
      }
          
      offsets.contextLimit += limit - offsets.limit;
diff --git a/icu4c/source/i18n/nortrans.cpp b/icu4c/source/i18n/nortrans.cpp

index 329fb9105213214cf09a08cc0c2066f518c0ad51..038a6836c3bca2efe13e0f7f7821cc59e46e3097 100644 (file)
--- a/icu4c/source/i18n/nortrans.cpp
+++ b/icu4c/source/i18n/nortrans.cpp
@@ -1,6 +1,6 @@
  /*
  **********************************************************************
-*   Copyright (C) 2001-2010, International Business Machines
+*   Copyright (C) 2001-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  *   Date        Name        Description
@@ -13,6 +13,7 @@
  #if !UCONFIG_NO_TRANSLITERATION
  
  #include "unicode/normalizer2.h"
+#include "unicode/utf16.h"
  #include "cstring.h"
  #include "nortrans.h"
  
diff --git a/icu4c/source/i18n/rbt_pars.cpp b/icu4c/source/i18n/rbt_pars.cpp

index ee59a9243101f5b1c2974acbbdffb8e1376ade67..7e3678b6cfbe943a5b1d7786243d967a4eef08b0 100644 (file)
--- a/icu4c/source/i18n/rbt_pars.cpp
+++ b/icu4c/source/i18n/rbt_pars.cpp
@@ -19,6 +19,7 @@
  #include "unicode/uchar.h"
  #include "unicode/ustring.h"
  #include "unicode/uniset.h"
+#include "unicode/utf16.h"
  #include "cstring.h"
  #include "funcrepl.h"
  #include "hash.h"
@@ -793,7 +794,7 @@ void RuleHalf::removeContext() {
  UBool RuleHalf::isValidOutput(TransliteratorParser& transParser) {
      for (int32_t i=0; i<text.length(); ) {
          UChar32 c = text.char32At(i);
-        i += UTF_CHAR_LENGTH(c);
+        i += U16_LENGTH(c);
          if (!transParser.parseData->isReplacer(c)) {
              return FALSE;
          }
@@ -808,7 +809,7 @@ UBool RuleHalf::isValidOutput(TransliteratorParser& transParser) {
  UBool RuleHalf::isValidInput(TransliteratorParser& transParser) {
      for (int32_t i=0; i<text.length(); ) {
          UChar32 c = text.char32At(i);
-        i += UTF_CHAR_LENGTH(c);
+        i += U16_LENGTH(c);
          if (!transParser.parseData->isMatcher(c)) {
              return FALSE;
          }
diff --git a/icu4c/source/i18n/rbt_rule.cpp b/icu4c/source/i18n/rbt_rule.cpp

index c3c05cad4b5fc1f0938bfc7ba45898c0a5b9ad3a..b21e37c9cea951ce98592da3dbe102c53889a148 100644 (file)
--- a/icu4c/source/i18n/rbt_rule.cpp
+++ b/icu4c/source/i18n/rbt_rule.cpp
@@ -15,6 +15,7 @@
  #include "unicode/rep.h"
  #include "unicode/unifilt.h"
  #include "unicode/uniset.h"
+#include "unicode/utf16.h"
  #include "rbt_rule.h"
  #include "rbt_data.h"
  #include "cmemory.h"
@@ -315,13 +316,13 @@ UBool TransliterationRule::masks(const TransliterationRule& r2) const {
  
  static inline int32_t posBefore(const Replaceable& str, int32_t pos) {
      return (pos > 0) ?
-        pos - UTF_CHAR_LENGTH(str.char32At(pos-1)) :
+        pos - U16_LENGTH(str.char32At(pos-1)) :
          pos - 1;
  }
  
  static inline int32_t posAfter(const Replaceable& str, int32_t pos) {
      return (pos >= 0 && pos < str.length()) ?
-        pos + UTF_CHAR_LENGTH(str.char32At(pos)) :
+        pos + U16_LENGTH(str.char32At(pos)) :
          pos + 1;
  }
  
@@ -531,7 +532,7 @@ void TransliterationRule::addSourceSetTo(UnicodeSet& toUnionTo) const {
      int32_t limit = anteContextLength + keyLength;
      for (int32_t i=anteContextLength; i<limit; ) {
          UChar32 ch = pattern.char32At(i);
-        i += UTF_CHAR_LENGTH(ch);
+        i += U16_LENGTH(ch);
          const UnicodeMatcher* matcher = data->lookupMatcher(ch);
          if (matcher == NULL) {
              toUnionTo.add(ch);
diff --git a/icu4c/source/i18n/rbt_set.cpp b/icu4c/source/i18n/rbt_set.cpp

index 4ac1ca12692c6a16ee4166c636b7d1d764b0646d..59c489d6e58996910596ba16ecb5950849064474 100644 (file)
--- a/icu4c/source/i18n/rbt_set.cpp
+++ b/icu4c/source/i18n/rbt_set.cpp
@@ -14,6 +14,7 @@
  
  #include "unicode/unistr.h"
  #include "unicode/uniset.h"
+#include "unicode/utf16.h"
  #include "rbt_set.h"
  #include "rbt_rule.h"
  #include "cmemory.h"
@@ -90,7 +91,7 @@ UnicodeString& _escape(const UnicodeString &source,
                         UnicodeString &target) {
      for (int32_t i = 0; i < source.length(); ) {
          UChar32 ch = source.char32At(i);
-        i += UTF_CHAR_LENGTH(ch);
+        i += U16_LENGTH(ch);
          if (ch < 0x09 || (ch > 0x0A && ch < 0x20)|| ch > 0x7E) {
              if (ch <= 0xFFFF) {
                  target += "\\u";
@@ -416,7 +417,7 @@ UBool TransliterationRuleSet::transliterate(Replaceable& text,
          }
      }
      // No match or partial match from any rule
-    pos.start += UTF_CHAR_LENGTH(text.char32At(pos.start));
+    pos.start += U16_LENGTH(text.char32At(pos.start));
      _debugOut("no match", NULL, text, pos);
      return TRUE;
  }
diff --git a/icu4c/source/i18n/regexcmp.cpp b/icu4c/source/i18n/regexcmp.cpp

index 968d08123fd7c186951019ec4ccecc051d2f089e..604e926312c2cbe462b90f66eaa0fdc00dd96de9 100644 (file)
--- a/icu4c/source/i18n/regexcmp.cpp
+++ b/icu4c/source/i18n/regexcmp.cpp
@@ -21,6 +21,8 @@
  #include "unicode/parsepos.h"
  #include "unicode/parseerr.h"
  #include "unicode/regex.h"
+#include "unicode/utf.h"
+#include "unicode/utf16.h"
  #include "patternprops.h"
  #include "putilimp.h"
  #include "cmemory.h"
diff --git a/icu4c/source/i18n/regextxt.cpp b/icu4c/source/i18n/regextxt.cpp

index 6960dac1b7488fa94302e1e5610c00079d77a9b0..2156756a49141106a631a052616fc5e07fd8f19d 100644 (file)
--- a/icu4c/source/i18n/regextxt.cpp
+++ b/icu4c/source/i18n/regextxt.cpp
@@ -1,6 +1,6 @@
  /********************************************************************
   * COPYRIGHT:
- * Copyright (c) 2008-2010, International Business Machines Corporation and
+ * Copyright (c) 2008-2011, International Business Machines Corporation and
   * others. All Rights Reserved.
   ********************************************************************/
  //
@@ -9,6 +9,7 @@
  //  This file contains utility code for supporting UText in the regular expression engine.
  //
  
+#include "unicode/utf.h"
  #include "regextxt.h"
  
  U_NAMESPACE_BEGIN
@@ -28,7 +29,7 @@ uregex_utext_unescape_charAt(int32_t offset, void *ct) {
          c = UTEXT_NEXT32(context->text);
          context->lastOffset = offset;
      }
-    
+
      // !!!: Doesn't handle characters outside BMP
      if (U_IS_BMP(c)) {
          return (UChar)c;
diff --git a/icu4c/source/i18n/rematch.cpp b/icu4c/source/i18n/rematch.cpp

index 7111d5495f0fddec6195b0da8485822eb6857bcf..4bd50ef0e3a1b6c774ba4fc8ac43f1ce6e5778b5 100644 (file)
--- a/icu4c/source/i18n/rematch.cpp
+++ b/icu4c/source/i18n/rematch.cpp
@@ -19,6 +19,8 @@
  #include "unicode/uchar.h"
  #include "unicode/ustring.h"
  #include "unicode/rbbi.h"
+#include "unicode/utf.h"
+#include "unicode/utf16.h"
  #include "uassert.h"
  #include "cmemory.h"
  #include "uvector.h"
diff --git a/icu4c/source/i18n/smpdtfmt.cpp b/icu4c/source/i18n/smpdtfmt.cpp

index d1df7eda8bad83eba0b23285ac2ad72f4361ca0d..5a88539a65601394dbdd95046428c856c36a0a30 100644 (file)
--- a/icu4c/source/i18n/smpdtfmt.cpp
+++ b/icu4c/source/i18n/smpdtfmt.cpp
@@ -45,6 +45,7 @@
  #include "unicode/basictz.h"
  #include "unicode/simpletz.h"
  #include "unicode/rbtz.h"
+#include "unicode/utf16.h"
  #include "unicode/vtzone.h"
  #include "olsontz.h"
  #include "patternprops.h"
@@ -2562,7 +2563,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
          if (!u_isUWhiteSpace(c) /*||*/ && !PatternProps::isWhiteSpace(c)) {
              break;
          }
-        start += UTF_CHAR_LENGTH(c);
+        start += U16_LENGTH(c);
      }
      pos.setIndex(start);
  
diff --git a/icu4c/source/i18n/strmatch.cpp b/icu4c/source/i18n/strmatch.cpp

index 242d83a57cfa24878c9e0259e24f9e011424e872..466dc4d89706fd81e26b3b99a9d59c2e87d58257 100644 (file)
--- a/icu4c/source/i18n/strmatch.cpp
+++ b/icu4c/source/i18n/strmatch.cpp
@@ -16,6 +16,7 @@
  #include "rbt_data.h"
  #include "util.h"
  #include "unicode/uniset.h"
+#include "unicode/utf16.h"
  
  U_NAMESPACE_BEGIN
  
@@ -194,7 +195,7 @@ UBool StringMatcher::matchesIndexValue(uint8_t v) const {
   */
  void StringMatcher::addMatchSetTo(UnicodeSet& toUnionTo) const {
      UChar32 ch;
-    for (int32_t i=0; i<pattern.length(); i+=UTF_CHAR_LENGTH(ch)) {
+    for (int32_t i=0; i<pattern.length(); i+=U16_LENGTH(ch)) {
          ch = pattern.char32At(i);
          const UnicodeMatcher* matcher = data->lookupMatcher(ch);
          if (matcher == NULL) {
@@ -276,8 +277,8 @@ void StringMatcher::setData(const TransliterationRuleData* d) {
          if (f != NULL) {
              f->setData(data);
          }
-        i += UTF_CHAR_LENGTH(c);
-    }    
+        i += U16_LENGTH(c);
+    }
  }
  
  U_NAMESPACE_END
diff --git a/icu4c/source/i18n/strrepl.cpp b/icu4c/source/i18n/strrepl.cpp

index f05c9c90c486dd5d79f462628381e1f56b14638d..7608e288f801ae90efaf874c6dd7573abc55354b 100644 (file)
--- a/icu4c/source/i18n/strrepl.cpp
+++ b/icu4c/source/i18n/strrepl.cpp
@@ -12,10 +12,11 @@
  
  #if !UCONFIG_NO_TRANSLITERATION
  
+#include "unicode/uniset.h"
+#include "unicode/utf16.h"
  #include "strrepl.h"
  #include "rbt_data.h"
  #include "util.h"
-#include "unicode/uniset.h"
  
  U_NAMESPACE_BEGIN
  
@@ -142,7 +143,7 @@ int32_t StringReplacer::replace(Replaceable& text,
          int32_t tempStart = text.length(); // start of temp buffer
          int32_t destStart = tempStart; // copy new text to here
          if (start > 0) {
-            int32_t len = UTF_CHAR_LENGTH(text.char32At(start-1));
+            int32_t len = U16_LENGTH(text.char32At(start-1));
              text.copy(start-len, start, tempStart);
              destStart += len;
          } else {
@@ -176,7 +177,7 @@ int32_t StringReplacer::replace(Replaceable& text,
                  int32_t len = r->replace(text, destLimit, destLimit, cursor);
                  destLimit += len;
              }
-            oOutput += UTF_CHAR_LENGTH(c);
+            oOutput += U16_LENGTH(c);
          }
          // Insert any accumulated straight text.
          if (buf.length() > 0) {
@@ -208,7 +209,7 @@ int32_t StringReplacer::replace(Replaceable& text,
              int32_t n = cursorPos;
              // Outside the output string, cursorPos counts code points
              while (n < 0 && newStart > 0) {
-                newStart -= UTF_CHAR_LENGTH(text.char32At(newStart-1));
+                newStart -= U16_LENGTH(text.char32At(newStart-1));
                  ++n;
              }
              newStart += n;
@@ -217,7 +218,7 @@ int32_t StringReplacer::replace(Replaceable& text,
              int32_t n = cursorPos - output.length();
              // Outside the output string, cursorPos counts code points
              while (n > 0 && newStart < text.length()) {
-                newStart += UTF_CHAR_LENGTH(text.char32At(newStart));
+                newStart += U16_LENGTH(text.char32At(newStart));
                  --n;
              }
              newStart += n;
@@ -292,7 +293,7 @@ UnicodeString& StringReplacer::toReplacerPattern(UnicodeString& rule,
   */
  void StringReplacer::addReplacementSetTo(UnicodeSet& toUnionTo) const {
      UChar32 ch;
-    for (int32_t i=0; i<output.length(); i+=UTF_CHAR_LENGTH(ch)) {
+    for (int32_t i=0; i<output.length(); i+=U16_LENGTH(ch)) {
      ch = output.char32At(i);
      UnicodeReplacer* r = data->lookupReplacer(ch);
      if (r == NULL) {
@@ -315,7 +316,7 @@ void StringReplacer::setData(const TransliterationRuleData* d) {
          if (f != NULL) {
              f->setData(data);
          }
-        i += UTF_CHAR_LENGTH(c);
+        i += U16_LENGTH(c);
      }
  }
  
diff --git a/icu4c/source/i18n/titletrn.cpp b/icu4c/source/i18n/titletrn.cpp

index 76c17d1bad7ce58da9a633b1a5e8a81cd9ca9d97..fe3983d69f6fa561d08a434995a8448ddd65aa9e 100644 (file)
--- a/icu4c/source/i18n/titletrn.cpp
+++ b/icu4c/source/i18n/titletrn.cpp
@@ -1,6 +1,6 @@
  /*
  **********************************************************************
-*   Copyright (C) 2001-2007, International Business Machines
+*   Copyright (C) 2001-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  *   Date        Name        Description
@@ -15,6 +15,7 @@
  #include "unicode/uchar.h"
  #include "unicode/uniset.h"
  #include "unicode/ustring.h"
+#include "unicode/utf16.h"
  #include "titletrn.h"
  #include "umutex.h"
  #include "ucase.h"
diff --git a/icu4c/source/i18n/translit.cpp b/icu4c/source/i18n/translit.cpp

index b1a1253c1faf7a4fe9c0949e25ef99f00dc66c96..94e58c39f32aadf78c0d8d5b77647caab35bb57d 100644 (file)
--- a/icu4c/source/i18n/translit.cpp
+++ b/icu4c/source/i18n/translit.cpp
@@ -24,6 +24,7 @@
  #include "unicode/uniset.h"
  #include "unicode/uscript.h"
  #include "unicode/strenum.h"
+#include "unicode/utf16.h"
  #include "cpdtrans.h"
  #include "nultrans.h"
  #include "rbt_data.h"
@@ -368,7 +369,7 @@ void Transliterator::_transliterate(Replaceable& text,
      }
  
      if (index.limit > 0 &&
-        UTF_IS_LEAD(text.charAt(index.limit - 1))) {
+        U16_IS_LEAD(text.charAt(index.limit - 1))) {
          // Oops, there is a dangling lead surrogate in the buffer.
          // This will break most transliterators, since they will
          // assume it is part of a pair.  Don't transliterate until
@@ -407,7 +408,7 @@ void Transliterator::_transliterate(Replaceable& text,
      int32_t n = getMaximumContextLength();
      while (newCS > originalStart && n-- > 0) {
          --newCS;
-        newCS -= UTF_CHAR_LENGTH(text.char32At(newCS)) - 1;
+        newCS -= U16_LENGTH(text.char32At(newCS)) - 1;
      }
      index.contextStart = uprv_max(newCS, originalStart);
  #endif
@@ -478,14 +479,14 @@ void Transliterator::filteredTransliterate(Replaceable& text,
              UChar32 c;
              while (index.start < globalLimit &&
                     !filter->contains(c=text.char32At(index.start))) {
-                index.start += UTF_CHAR_LENGTH(c);
+                index.start += U16_LENGTH(c);
              }
  
              // Find the end of this run of unfiltered chars
              index.limit = index.start;
              while (index.limit < globalLimit &&
                     filter->contains(c=text.char32At(index.limit))) {
-                index.limit += UTF_CHAR_LENGTH(c);
+                index.limit += U16_LENGTH(c);
              }
          }
  
@@ -568,8 +569,7 @@ void Transliterator::filteredTransliterate(Replaceable& text,
              // transliterations and commit complete transliterations.
              for (;;) {
                  // Length of additional code point, either one or two
-                int32_t charLength =
-                    UTF_CHAR_LENGTH(text.char32At(passLimit));
+                int32_t charLength = U16_LENGTH(text.char32At(passLimit));
                  passLimit += charLength;
                  if (passLimit > runLimit) {
                      break;
@@ -1144,7 +1144,7 @@ UnicodeString& Transliterator::toRules(UnicodeString& rulesSource,
              if (!ICU_Utility::escapeUnprintable(rulesSource, c)) {
                  rulesSource.append(c);
              }
-            i += UTF_CHAR_LENGTH(c);
+            i += U16_LENGTH(c);
          }
      } else {
          rulesSource = getID();
diff --git a/icu4c/source/i18n/ucol_bld.cpp b/icu4c/source/i18n/ucol_bld.cpp

index cf7aed33d7835e970f2fda19cfdeac21e68e7b78..0d506ed2a2f904cb14f57a44ac376bf95d83ba4d 100644 (file)
--- a/icu4c/source/i18n/ucol_bld.cpp
+++ b/icu4c/source/i18n/ucol_bld.cpp
@@ -27,6 +27,7 @@
  #include "unicode/uniset.h"
  #include "unicode/uscript.h"
  #include "unicode/ustring.h"
+#include "unicode/utf16.h"
  #include "normalizer2impl.h"
  #include "ucol_bld.h"
  #include "ucol_elm.h"
@@ -982,7 +983,7 @@ _processUCACompleteIgnorables(const void *context, UChar32 start, UChar32 limit,
                  el.cPoints = el.uchars;
  
                  el.cSize = 0;
-                UTF_APPEND_CHAR(el.uchars, el.cSize, 1024, start);
+                U16_APPEND_UNSAFE(el.uchars, el.cSize, start);
  
                  el.noOfCEs = 1;
                  el.CEs[0] = 0;
diff --git a/icu4c/source/i18n/ucol_elm.cpp b/icu4c/source/i18n/ucol_elm.cpp

index 0488324b3eefa89b680422b01621ff8f42f77967..ae9269fbde24669e3c0ad46556964bec2c1c78df 100644 (file)
--- a/icu4c/source/i18n/ucol_elm.cpp
+++ b/icu4c/source/i18n/ucol_elm.cpp
@@ -31,6 +31,7 @@
  #include "unicode/unistr.h"
  #include "unicode/ucoleitr.h"
  #include "unicode/normlzr.h"
+#include "unicode/utf16.h"
  #include "normalizer2impl.h"
  #include "ucol_elm.h"
  #include "ucol_tok.h"
@@ -767,7 +768,7 @@ static void uprv_uca_unsafeCPAddCCNZ(tempUCATable *t, UErrorCode *status) {
      for (c=0; c<0xffff; c++) {
          fcd = unorm_getFCD16(fcdTrieIndex, c);
          if (fcd >= 0x100 ||               // if the leading combining class(c) > 0 ||
-            (UTF_IS_LEAD(c) && fcd != 0)) {//    c is a leading surrogate with some FCD data
+            (U16_IS_LEAD(c) && fcd != 0)) {//    c is a leading surrogate with some FCD data
              if (buildCMTable) {
                  uint32_t cClass = fcd & 0xff;
                  //uint32_t temp=(cClass<<8)+index[cClass];
@@ -845,7 +846,7 @@ static uint32_t uprv_uca_addPrefix(tempUCATable *t, uint32_t CE,
      for (j = 1; j<element->prefixSize; j++) {   /* First add NFD prefix chars to unsafe CP hash table */
          // Unless it is a trail surrogate, which is handled algoritmically and
          // shouldn't take up space in the table.
-        if(!(UTF_IS_TRAIL(element->prefix[j]))) {
+        if(!(U16_IS_TRAIL(element->prefix[j]))) {
              unsafeCPSet(t->unsafeCP, element->prefix[j]);
          }
      }
@@ -868,13 +869,13 @@ static uint32_t uprv_uca_addPrefix(tempUCATable *t, uint32_t CE,
  #endif
  
      // the first codepoint is also unsafe, as it forms a 'contraction' with the prefix
-    if(!(UTF_IS_TRAIL(element->cPoints[0]))) {
+    if(!(U16_IS_TRAIL(element->cPoints[0]))) {
          unsafeCPSet(t->unsafeCP, element->cPoints[0]);
      }
  
      // Maybe we need this... To handle prefixes completely in the forward direction...
      //if(element->cSize == 1) {
-    //  if(!(UTF_IS_TRAIL(element->cPoints[0]))) {
+    //  if(!(U16_IS_TRAIL(element->cPoints[0]))) {
      //    ContrEndCPSet(t->contrEndCP, element->cPoints[0]);
      //  }
      //}
@@ -885,12 +886,12 @@ static uint32_t uprv_uca_addPrefix(tempUCATable *t, uint32_t CE,
      // Add the last char of the contraction to the contraction-end hash table.
      // unless it is a trail surrogate, which is handled algorithmically and
      // shouldn't be in the table
-    if(!(UTF_IS_TRAIL(element->cPoints[element->cSize -1]))) {
+    if(!(U16_IS_TRAIL(element->cPoints[element->cSize -1]))) {
          ContrEndCPSet(t->contrEndCP, element->cPoints[element->cSize -1]);
      }
  
      // First we need to check if contractions starts with a surrogate
-    UTF_NEXT_CHAR(element->cPoints, cpsize, element->cSize, cp);
+    U16_NEXT(element->cPoints, cpsize, element->cSize, cp);
  
      // If there are any Jamos in the contraction, we should turn on special
      // processing for Jamos
@@ -943,21 +944,21 @@ static uint32_t uprv_uca_addContraction(tempUCATable *t, uint32_t CE,
      contractions->currentTag = CONTRACTION_TAG;
  
      // First we need to check if contractions starts with a surrogate
-    UTF_NEXT_CHAR(element->cPoints, cpsize, element->cSize, cp);
+    U16_NEXT(element->cPoints, cpsize, element->cSize, cp);
  
      if(cpsize<element->cSize) { // This is a real contraction, if there are other characters after the first
          uint32_t j = 0;
          for (j=1; j<element->cSize; j++) {   /* First add contraction chars to unsafe CP hash table */
              // Unless it is a trail surrogate, which is handled algoritmically and 
              // shouldn't take up space in the table.
-            if(!(UTF_IS_TRAIL(element->cPoints[j]))) {
+            if(!(U16_IS_TRAIL(element->cPoints[j]))) {
                  unsafeCPSet(t->unsafeCP, element->cPoints[j]);
              }
          }
          // Add the last char of the contraction to the contraction-end hash table.
          // unless it is a trail surrogate, which is handled algorithmically and 
          // shouldn't be in the table
-        if(!(UTF_IS_TRAIL(element->cPoints[element->cSize -1]))) {
+        if(!(U16_IS_TRAIL(element->cPoints[element->cSize -1]))) {
              ContrEndCPSet(t->contrEndCP, element->cPoints[element->cSize -1]);
          }
  
@@ -1065,7 +1066,7 @@ static uint32_t uprv_uca_finalizeAddition(tempUCATable *t, UCAElements *element,
      uint32_t i = 0;
      if(element->mapCE == 0) {
          for(i = 0; i < element->cSize; i++) {
-            if(!UTF_IS_TRAIL(element->cPoints[i])) {
+            if(!U16_IS_TRAIL(element->cPoints[i])) {
                  unsafeCPSet(t->unsafeCP, element->cPoints[i]);
              }
          }
@@ -1074,7 +1075,7 @@ static uint32_t uprv_uca_finalizeAddition(tempUCATable *t, UCAElements *element,
          uint32_t i = 0;
          UChar32 cp;
  
-        UTF_NEXT_CHAR(element->cPoints, i, element->cSize, cp);
+        U16_NEXT(element->cPoints, i, element->cSize, cp);
          /*CE = ucmpe32_get(t->mapping, cp);*/
          CE = utrie_get32(t->mapping, cp, NULL);
  
@@ -1286,7 +1287,7 @@ uprv_uca_addAnElement(tempUCATable *t, UCAElements *element, UErrorCode *status)
      // We need to use the canonical iterator here
      // the way we do it is to generate the canonically equivalent strings 
      // for the contraction and then add the sequences that pass FCD check
-    if(element->cSize > 1 && !(element->cSize==2 && UTF16_IS_LEAD(element->cPoints[0]) && UTF16_IS_TRAIL(element->cPoints[1]))) { // this is a contraction, we should check whether a composed form should also be included
+    if(element->cSize > 1 && !(element->cSize==2 && U16_IS_LEAD(element->cPoints[0]) && U16_IS_TRAIL(element->cPoints[1]))) { // this is a contraction, we should check whether a composed form should also be included
          UnicodeString source(element->cPoints, element->cSize);
          CanonicalIterator it(source, *status);
          source = it.next();
@@ -1406,7 +1407,7 @@ UBool enumRange(const void *context, UChar32 start, UChar32 limit, uint32_t valu
      if(start<0x10000) {
          fprintf(stdout, "%08X, %08X, %08X\n", start, limit, value);
      } else {
-        fprintf(stdout, "%08X=%04X %04X, %08X=%04X %04X, %08X\n", start, UTF16_LEAD(start), UTF16_TRAIL(start), limit, UTF16_LEAD(limit), UTF16_TRAIL(limit), value);
+        fprintf(stdout, "%08X=%04X %04X, %08X=%04X %04X, %08X\n", start, U16_LEAD(start), U16_TRAIL(start), limit, U16_LEAD(limit), U16_TRAIL(limit), value);
      }
      return TRUE;
  }
diff --git a/icu4c/source/i18n/ucol_imp.h b/icu4c/source/i18n/ucol_imp.h

index 099a30de5e90b3172006608b658dd5629821b586..e82badfbf500281a9cf1e1a9166a8af837856fc7 100644 (file)
--- a/icu4c/source/i18n/ucol_imp.h
+++ b/icu4c/source/i18n/ucol_imp.h
@@ -27,6 +27,9 @@
  #define UCOL_IMP_H
  
  #include "unicode/utypes.h"
+#ifdef __cplusplus
+#   include "unicode/utf16.h"
+#endif
  
  #define UCA_DATA_TYPE "icu"
  #define UCA_DATA_NAME "ucadata"
@@ -1104,7 +1107,7 @@ static inline UBool ucol_unsafeCP(UChar c, const UCollator *coll) {
  
      hash = c;
      if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) {
-        if(UTF_IS_SURROGATE(c)) {
+        if(U16_IS_SURROGATE(c)) {
              /*  Lead or trail surrogate             */
              /*  These are always considered unsafe. */
              return TRUE;
diff --git a/icu4c/source/i18n/unesctrn.cpp b/icu4c/source/i18n/unesctrn.cpp

index c3d848d06e9b45e56e3ccc06cc95e688fb8c6d6f..c950362c413a7e2be4a000d31571c49d4c3e8649 100644 (file)
--- a/icu4c/source/i18n/unesctrn.cpp
+++ b/icu4c/source/i18n/unesctrn.cpp
@@ -1,6 +1,6 @@
  /*
   **********************************************************************
- *   Copyright (c) 2001-2008, International Business Machines
+ *   Copyright (c) 2001-2011, International Business Machines
   *   Corporation and others.  All Rights Reserved.
   **********************************************************************
   *   Date        Name        Description
@@ -13,6 +13,7 @@
  #if !UCONFIG_NO_TRANSLITERATION
  
  #include "unicode/uchar.h"
+#include "unicode/utf16.h"
  #include "unesctrn.h"
  #include "util.h"
  
@@ -229,7 +230,7 @@ void UnescapeTransliterator::handleTransliterate(Replaceable& text, UTransPositi
                      if (digit < 0) {
                          break;
                      }
-                    s += UTF_CHAR_LENGTH(ch);
+                    s += U16_LENGTH(ch);
                      u = (u * radix) + digit;
                      if (++digitCount == maxDigits) {
                          break;
@@ -273,7 +274,7 @@ void UnescapeTransliterator::handleTransliterate(Replaceable& text, UTransPositi
          }
  
          if (start < limit) {
-            start += UTF_CHAR_LENGTH(text.char32At(start));
+            start += U16_LENGTH(text.char32At(start));
          }
      }
  
diff --git a/icu4c/source/i18n/uni2name.cpp b/icu4c/source/i18n/uni2name.cpp

index 5eba79daceb6f96490d01197563e1179a575f1a0..4a1defd0582bc746b0dddbc615210aa2643673c5 100644 (file)
--- a/icu4c/source/i18n/uni2name.cpp
+++ b/icu4c/source/i18n/uni2name.cpp
@@ -1,6 +1,6 @@
  /*
  **********************************************************************
-*   Copyright (C) 2001-2007, International Business Machines
+*   Copyright (C) 2001-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  *   Date        Name        Description
@@ -14,6 +14,7 @@
  
  #include "unicode/unifilt.h"
  #include "unicode/uchar.h"
+#include "unicode/utf16.h"
  #include "uni2name.h"
  #include "cstring.h"
  #include "cmemory.h"
@@ -94,7 +95,7 @@ void UnicodeNameTransliterator::handleTransliterate(Replaceable& text, UTransPos
  
      while (cursor < limit) {
          UChar32 c = text.char32At(cursor);
-        int32_t clen = UTF_CHAR_LENGTH(c);
+        int32_t clen = U16_LENGTH(c);
          status = U_ZERO_ERROR;
          if ((len = u_charName(c, U_EXTENDED_CHAR_NAME, buf, maxLen, &status)) >0 && !U_FAILURE(status)) {
              str.truncate(OPEN_DELIM_LEN);
diff --git a/icu4c/source/i18n/uregex.cpp b/icu4c/source/i18n/uregex.cpp

index 4dd50f10f11e9b66e034ff06271bfceff27948c6..f096ac126a7f38d4f75b11f97eb1cf5497572e55 100644 (file)
--- a/icu4c/source/i18n/uregex.cpp
+++ b/icu4c/source/i18n/uregex.cpp
@@ -16,6 +16,7 @@
  #include "unicode/ustring.h"
  #include "unicode/uchar.h"
  #include "unicode/uobject.h"
+#include "unicode/utf16.h"
  #include "umutex.h"
  #include "uassert.h"
  #include "cmemory.h"
diff --git a/icu4c/source/i18n/usearch.cpp b/icu4c/source/i18n/usearch.cpp

index 7078409fc2f12093de79838a8b7396a43adab60d..ee53f08531f2b1f7491737995646c0f37110c1d3 100644 (file)
--- a/icu4c/source/i18n/usearch.cpp
+++ b/icu4c/source/i18n/usearch.cpp
@@ -14,6 +14,7 @@
  #include "unicode/usearch.h"
  #include "unicode/ustring.h"
  #include "unicode/uchar.h"
+#include "unicode/utf16.h"
  #include "normalizer2impl.h"
  #include "ucol_imp.h"
  #include "usrchimp.h"
@@ -459,7 +460,7 @@ inline int16_t initializePattern(UStringSearch *strsrch, UErrorCode *status)
          pattern->hasPrefixAccents = getFCD(patterntext, &index, length) >>
                                                           SECOND_LAST_BYTE_SHIFT_;
          index = length;
-        UTF_BACK_1(patterntext, 0, index);
+        U16_BACK_1(patterntext, 0, index);
          pattern->hasSuffixAccents = getFCD(patterntext, &index, length) &
                                                                   LAST_BYTE_MASK_;
      }
@@ -717,7 +718,7 @@ inline int32_t getNextUStringSearchBaseOffset(UStringSearch *strsrch,
          textoffset < textlength) {
                int32_t  temp       = textoffset;
          const UChar       *text       = strsrch->search->text;
-        UTF_BACK_1(text, 0, temp);
+        U16_BACK_1(text, 0, temp);
          if (getFCD(text, &temp, textlength) & LAST_BYTE_MASK_) {
              return getNextBaseOffset(text, textoffset, textlength);
          }
@@ -847,7 +848,7 @@ UBool checkExtraMatchAccents(const UStringSearch *strsrch, int32_t start,
                int32_t  offset = 0;
          const UChar       *text   = strsrch->search->text + start;
  
-        UTF_FWD_1(text, offset, length);
+        U16_FWD_1(text, offset, length);
          // we are only concerned with the first composite character
          if (unorm_quickCheck(text, offset, UNORM_NFD, status) == UNORM_NO) {
              int32_t safeoffset = getNextSafeOffset(strsrch->collator,
@@ -893,7 +894,7 @@ UBool checkExtraMatchAccents(const UStringSearch *strsrch, int32_t start,
                  ce = ucol_next(coleiter, status);
              }
              UChar32 codepoint;
-            UTF_PREV_CHAR(norm, 0, offset, codepoint);
+            U16_PREV(norm, 0, offset, codepoint);
              result = !ignorable && (u_getCombiningClass(codepoint) != 0);
  
              if (norm != buffer) {
@@ -975,7 +976,7 @@ UBool hasAccentsBeforeMatch(const UStringSearch *strsrch, int32_t start,
          }
          if (start > 0) {
              temp = start;
-            UTF_BACK_1(strsrch->search->text, 0, temp);
+            U16_BACK_1(strsrch->search->text, 0, temp);
              if (getFCD(strsrch->search->text, &temp,
                         strsrch->search->textLength) & LAST_BYTE_MASK_) {
                  setColEIterOffset(coleiter, start);
@@ -1015,7 +1016,7 @@ UBool hasAccentsAfterMatch(const UStringSearch *strsrch, int32_t start,
          const UChar       *text       = strsrch->search->text;
                int32_t  temp       = end;
                int32_t      textlength = strsrch->search->textLength;
-        UTF_BACK_1(text, 0, temp);
+        U16_BACK_1(text, 0, temp);
          if (getFCD(text, &temp, textlength) & LAST_BYTE_MASK_) {
              int32_t             firstce  = strsrch->pattern.CE[0];
              UCollationElements *coleiter = strsrch->textIter;
@@ -1304,7 +1305,7 @@ inline int32_t getPreviousBaseOffset(const UChar       *text,
      if (textoffset > 0) {
          for (;;) {
              int32_t result = textoffset;
-            UTF_BACK_1(text, 0, textoffset);
+            U16_BACK_1(text, 0, textoffset);
              int32_t temp = textoffset;
              uint16_t fcd = getFCD(text, &temp, result);
              if ((fcd >> SECOND_LAST_BYTE_SHIFT_) == 0) {
@@ -1338,7 +1339,7 @@ inline int getUnblockedAccentIndex(UChar *accents, int32_t *accentsindex)
      int32_t temp;
      while (index < length) {
          temp = index;
-        UTF_NEXT_CHAR(accents, index, length, codepoint);
+        U16_NEXT(accents, index, length, codepoint);
          if (u_getCombiningClass(codepoint) != cclass) {
              cclass        = u_getCombiningClass(codepoint);
              accentsindex[result] = temp;
@@ -1722,7 +1723,7 @@ UBool doNextCanonicalMatch(UStringSearch *strsrch,
  {
      const UChar       *text = strsrch->search->text;
            int32_t  temp = textoffset;
-    UTF_BACK_1(text, 0, temp);
+    U16_BACK_1(text, 0, temp);
      if ((getFCD(text, &temp, textoffset) & LAST_BYTE_MASK_) == 0) {
          UCollationElements *coleiter = strsrch->textIter;
          int32_t         offset   = getColElemIterOffset(coleiter, FALSE);
@@ -2164,7 +2165,7 @@ int32_t doPreviousCanonicalSuffixMatch(UStringSearch *strsrch,
      const UChar       *text       = strsrch->search->text;
            int32_t  tempend    = end;
  
-    UTF_BACK_1(text, 0, tempend);
+    U16_BACK_1(text, 0, tempend);
      if (!(getFCD(text, &tempend, strsrch->search->textLength) &
                                                             LAST_BYTE_MASK_)) {
          // die... failed at a base character
@@ -2513,7 +2514,7 @@ UBool checkPreviousCanonicalContractionMatch(UStringSearch *strsrch,
                  // accents may have extra starting ces, this occurs when a
                  // pure accent pattern is matched without rearrangement
                  int32_t    expected = patternce[patterncelength - 1];
-                UTF_BACK_1(text, 0, *end);
+                U16_BACK_1(text, 0, *end);
                  if (getFCD(text, end, textlength) & LAST_BYTE_MASK_) {
                      ce = getCE(strsrch, ucol_previous(coleiter, status));
                      while (U_SUCCESS(*status) && ce != expected &&
@@ -3227,7 +3228,7 @@ U_CAPI int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch,
                      search->matchedIndex = offset;
                  }
                  else { // moves by codepoints
-                    UTF_FWD_1(search->text, search->matchedIndex, textlength);
+                    U16_FWD_1(search->text, search->matchedIndex, textlength);
                  }
  
                  search->matchedLength = 0;
@@ -3341,7 +3342,7 @@ U_CAPI int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch,
                      // status checked below
                  }
                  else { // move by codepoints
-                    UTF_BACK_1(search->text, 0, search->matchedIndex);
+                    U16_BACK_1(search->text, 0, search->matchedIndex);
                      setColEIterOffset(strsrch->textIter, search->matchedIndex);
                      // status checked below
                      search->matchedLength = 0;
diff --git a/icu4c/source/i18n/uspoof.cpp b/icu4c/source/i18n/uspoof.cpp

index 908cafa0105d33881f6fdf6efee6ee0285dcdf2f..72bb7edae4139009381d8d5b18fb3f81226beb9f 100644 (file)
--- a/icu4c/source/i18n/uspoof.cpp
+++ b/icu4c/source/i18n/uspoof.cpp
@@ -17,6 +17,7 @@
  #include "unicode/uspoof.h"
  #include "unicode/unorm.h"
  #include "unicode/ustring.h"
+#include "unicode/utf16.h"
  #include "cmemory.h"
  #include "uspoof_impl.h"
  #include "uassert.h"
diff --git a/icu4c/source/i18n/uspoof_impl.cpp b/icu4c/source/i18n/uspoof_impl.cpp

index e43072273d53f99c706642df3afe5a364ddaaabe..891b3e7b9941470114d267e4f1bca26a4298e941 100644 (file)
--- a/icu4c/source/i18n/uspoof_impl.cpp
+++ b/icu4c/source/i18n/uspoof_impl.cpp
@@ -10,6 +10,7 @@
  #include "unicode/unorm.h"
  #include "unicode/uchar.h"
  #include "unicode/uniset.h"
+#include "unicode/utf16.h"
  #include "utrie2.h"
  #include "cmemory.h"
  #include "cstring.h"
diff --git a/icu4c/source/io/ufmt_cmn.h b/icu4c/source/io/ufmt_cmn.h

index 29a4a07fc61426b64d565911dfb946f95ff33756..f712bb3e1fb87623826ac03f9db68115c595dff2 100644 (file)
--- a/icu4c/source/io/ufmt_cmn.h
+++ b/icu4c/source/io/ufmt_cmn.h
@@ -1,7 +1,7 @@
  /*
  ******************************************************************************
  *
-*   Copyright (C) 1998-2004, International Business Machines
+*   Copyright (C) 1998-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  ******************************************************************************
@@ -21,10 +21,11 @@
  #define UFMT_CMN_H
  
  #include "unicode/utypes.h"
+#include "unicode/utf16.h"
  
  #define UFMT_DEFAULT_BUFFER_SIZE 128
-#define MAX_UCHAR_BUFFER_SIZE(buffer) (sizeof(buffer)/(UTF_MAX_CHAR_LENGTH*sizeof(UChar)))
-#define MAX_UCHAR_BUFFER_NEEDED(strLen) ((strLen+1)*UTF_MAX_CHAR_LENGTH*sizeof(UChar))
+#define MAX_UCHAR_BUFFER_SIZE(buffer) (sizeof(buffer)/(U16_MAX_LENGTH*sizeof(UChar)))
+#define MAX_UCHAR_BUFFER_NEEDED(strLen) ((strLen+1)*U16_MAX_LENGTH*sizeof(UChar))
  
  /** 
   * Enum representing the possible argument types for uprintf/uscanf
diff --git a/icu4c/source/io/uprntf_p.c b/icu4c/source/io/uprntf_p.c

index f303c0c8d6c2d383f47369b5ec878cbce1844062..78c2e5da7595bcf6f6bfb8e034562165d63ef4be 100644 (file)
--- a/icu4c/source/io/uprntf_p.c
+++ b/icu4c/source/io/uprntf_p.c
@@ -1,7 +1,7 @@
  /*
  ******************************************************************************
  *
-*   Copyright (C) 1998-2008, International Business Machines
+*   Copyright (C) 1998-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  ******************************************************************************
@@ -22,7 +22,7 @@
  #if !UCONFIG_NO_FORMATTING
  
  #include "unicode/ustring.h"
-
+#include "unicode/utf16.h"
  #include "uprintf.h"
  #include "ufmt_cmn.h"
  #include "cmemory.h"
@@ -246,7 +246,7 @@ u_printf_char_handler(const u_printf_stream_handler  *handler,
                        const u_printf_spec_info       *info,
                        const ufmt_args                *args)
  {
-    UChar s[UTF_MAX_CHAR_LENGTH+1];
+    UChar s[U16_MAX_LENGTH+1];
      int32_t len = 1, written;
      unsigned char arg = (unsigned char)(args[0].int64Value);
  
diff --git a/icu4c/source/io/ustream.cpp b/icu4c/source/io/ustream.cpp

index b3d0af41e4b67eafd845e1bbd8566b90aaa3e73f..fb5f8880150ddef11a95295a568519f0221b22d4 100644 (file)
--- a/icu4c/source/io/ustream.cpp
+++ b/icu4c/source/io/ustream.cpp
@@ -17,6 +17,7 @@
  #include "unicode/ustream.h"
  #include "unicode/ucnv.h"
  #include "unicode/uchar.h"
+#include "unicode/utf16.h"
  #include "ustr_cnv.h"
  #include "cmemory.h"
  #include <string.h>
diff --git a/icu4c/source/test/cintltst/bocu1tst.c b/icu4c/source/test/cintltst/bocu1tst.c

index 47b29e209dcee589f53c7e960b60eef6da04b237..9b0befc97101c54a9e12ac15306e607d8826c0be 100644 (file)
--- a/icu4c/source/test/cintltst/bocu1tst.c
+++ b/icu4c/source/test/cintltst/bocu1tst.c
@@ -31,6 +31,7 @@
  #include "unicode/utypes.h"
  #include "unicode/ustring.h"
  #include "unicode/ucnv.h"
+#include "unicode/utf16.h"
  #include "cmemory.h"
  #include "cintltst.h"
  
@@ -715,7 +716,7 @@ writeString(const UChar *s, int32_t length, uint8_t *p) {
      p0=p;
      i=0;
      while(i<length) {
-        UTF_NEXT_CHAR(s, i, length, c);
+        U16_NEXT(s, i, length, c);
          p+=writePacked(encodeBocu1(&prev, c), p);
      }
      return (int32_t)(p-p0);
@@ -743,7 +744,7 @@ readString(const uint8_t *p, int32_t length, UChar *s) {
              return -1;
          }
          if(c>=0) {
-            UTF_APPEND_CHAR_UNSAFE(s, sLength, c);
+            U16_APPEND_UNSAFE(s, sLength, c);
          }
      }
      return sLength;
diff --git a/icu4c/source/test/cintltst/cmsccoll.c b/icu4c/source/test/cintltst/cmsccoll.c

index 948e1ca3d533e81eff3cce821ac6f179710d251d..6ff0e0a9475c3de9c1d6a8e82bbebb6ae18334b5 100644 (file)
--- a/icu4c/source/test/cintltst/cmsccoll.c
+++ b/icu4c/source/test/cintltst/cmsccoll.c
@@ -37,6 +37,7 @@
  #include "unicode/ucnv.h"
  #include "unicode/ures.h"
  #include "unicode/uscript.h"
+#include "unicode/utf16.h"
  #include "uparse.h"
  #include "putilimp.h"
  
@@ -1678,7 +1679,7 @@ static void TestComposeDecompose(void) {
      for(u = 0; u < charsToTestSize; u++) {
          UChar32 ch = uset_charAt(charsToTest, u);
          len = 0;
-        UTF_APPEND_CHAR_UNSAFE(comp, len, ch);
+        U16_APPEND_UNSAFE(comp, len, ch);
          nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
          nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
  
@@ -1712,7 +1713,7 @@ static void TestComposeDecompose(void) {
        uprv_memset(t[noCases], 0, sizeof(tester));
        t[noCases]->u = u;
        len = 0;
-      UTF_APPEND_CHAR_UNSAFE(comp, len, u);
+      U16_APPEND_UNSAFE(comp, len, u);
        comp[len] = 0;
        nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
        nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
diff --git a/icu4c/source/test/cintltst/cnormtst.c b/icu4c/source/test/cintltst/cnormtst.c

index b47a9ef39d7621804005317d837d9f5ae04520ce..2999152230829f7c0967b3f7a547156b7fa9a6b8 100644 (file)
--- a/icu4c/source/test/cintltst/cnormtst.c
+++ b/icu4c/source/test/cintltst/cnormtst.c
@@ -16,6 +16,7 @@
  /*tests for u_normalization*/
  #include "unicode/utypes.h"
  #include "unicode/unorm.h"
+#include "unicode/utf16.h"
  #include "cintltst.h"
  
  #if UCONFIG_NO_NORMALIZATION
@@ -811,13 +812,13 @@ TestNormCoverage() {
  
      hangulPrefixLength=inLength;
  
-    input[inLength++]=UTF16_LEAD(MUSICAL_HALF_NOTE);
-    input[inLength++]=UTF16_TRAIL(MUSICAL_HALF_NOTE);
+    input[inLength++]=U16_LEAD(MUSICAL_HALF_NOTE);
+    input[inLength++]=U16_TRAIL(MUSICAL_HALF_NOTE);
      for(i=0; i<200; ++i) {
-        input[inLength++]=UTF16_LEAD(MUSICAL_STACCATO);
-        input[inLength++]=UTF16_TRAIL(MUSICAL_STACCATO);
-        input[inLength++]=UTF16_LEAD(MUSICAL_STEM);
-        input[inLength++]=UTF16_TRAIL(MUSICAL_STEM);
+        input[inLength++]=U16_LEAD(MUSICAL_STACCATO);
+        input[inLength++]=U16_TRAIL(MUSICAL_STACCATO);
+        input[inLength++]=U16_LEAD(MUSICAL_STEM);
+        input[inLength++]=U16_TRAIL(MUSICAL_STEM);
      }
  
      /* (compatibility) Jamo L, T do not compose */
@@ -870,17 +871,17 @@ TestNormCoverage() {
  
      expect[expectLength++]=HANGUL_AC00+14*28;
  
-    expect[expectLength++]=UTF16_LEAD(MUSICAL_VOID_NOTEHEAD);
-    expect[expectLength++]=UTF16_TRAIL(MUSICAL_VOID_NOTEHEAD);
-    expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM);
-    expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM);
+    expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
+    expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
+    expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
+    expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
      for(i=0; i<200; ++i) {
-        expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM);
-        expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM);
+        expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
+        expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
      }
      for(i=0; i<200; ++i) {
-        expect[expectLength++]=UTF16_LEAD(MUSICAL_STACCATO);
-        expect[expectLength++]=UTF16_TRAIL(MUSICAL_STACCATO);
+        expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
+        expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
      }
  
      expect[expectLength++]=HANGUL_KIYEOK;
@@ -921,17 +922,17 @@ TestNormCoverage() {
      u_memcpy(expect, input, hangulPrefixLength);
      expectLength=hangulPrefixLength;
  
-    expect[expectLength++]=UTF16_LEAD(MUSICAL_VOID_NOTEHEAD);
-    expect[expectLength++]=UTF16_TRAIL(MUSICAL_VOID_NOTEHEAD);
-    expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM);
-    expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM);
+    expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
+    expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
+    expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
+    expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
      for(i=0; i<200; ++i) {
-        expect[expectLength++]=UTF16_LEAD(MUSICAL_STEM);
-        expect[expectLength++]=UTF16_TRAIL(MUSICAL_STEM);
+        expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
+        expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
      }
      for(i=0; i<200; ++i) {
-        expect[expectLength++]=UTF16_LEAD(MUSICAL_STACCATO);
-        expect[expectLength++]=UTF16_TRAIL(MUSICAL_STACCATO);
+        expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
+        expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
      }
  
      expect[expectLength++]=HANGUL_K_KIYEOK;
diff --git a/icu4c/source/test/cintltst/cucdtst.c b/icu4c/source/test/cintltst/cucdtst.c

index fc1ba351ef7f2f1728987fcccdd35d561513aa74..cfdb05cb3d84dece54eb73680eb0180cfc07107d 100644 (file)
--- a/icu4c/source/test/cintltst/cucdtst.c
+++ b/icu4c/source/test/cintltst/cucdtst.c
@@ -1439,7 +1439,7 @@ static void TestCharLength()
      for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+2)){
          UChar32 c=codepoint[i+1];
          if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){
-            log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], UTF_CHAR_LENGTH(c));
+            log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c));
          }
          multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
          if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
diff --git a/icu4c/source/test/cintltst/nccbtst.c b/icu4c/source/test/cintltst/nccbtst.c

index 5fa304522636af4aaca5cb52dbae730bd502b0ae..f4e3c37d71f72ead0b9315d27ae4d1113ac6379c 100644 (file)
--- a/icu4c/source/test/cintltst/nccbtst.c
+++ b/icu4c/source/test/cintltst/nccbtst.c
@@ -1,6 +1,6 @@
  /********************************************************************
   * COPYRIGHT: 
- * Copyright (c) 1997-2010, International Business Machines Corporation and
+ * Copyright (c) 1997-2011, International Business Machines Corporation and
   * others. All Rights Reserved.
   ********************************************************************/
  /*
@@ -25,6 +25,8 @@
  #include "unicode/ustring.h"
  #include "nccbtst.h"
  #include "unicode/ucnv_cb.h"
+#include "unicode/utf16.h"
+
  #define NEW_MAX_BUFFER 999
  
  #define nct_min(x,y)  ((x<y) ? x : y)
@@ -1736,10 +1738,10 @@ static void TestSub(int32_t inputsize, int32_t outputsize)
              in4[]={ 0x00, 0x01, 0x02, 0x03,   0x00, 0x11, 0x12, 0x00,   0x00, 0x00, 0x4e, 0x00 };
  
          static const UChar
-            out1[]={ UTF16_LEAD(0x100f00), UTF16_TRAIL(0x100f00), 0xfeff },
-            out2[]={ UTF16_LEAD(0x0f1000), UTF16_TRAIL(0x0f1000), 0xfffe },
-            out3[]={ 0xfefe, UTF16_LEAD(0x100f00), UTF16_TRAIL(0x100f00), 0xfffd, 0xfffd },
-            out4[]={ UTF16_LEAD(0x10203), UTF16_TRAIL(0x10203), 0xfffd, 0x4e00 };
+            out1[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff },
+            out2[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe },
+            out3[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0xfffd },
+            out4[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 };
  
          static const int32_t
              offsets1[]={ 4, 4, 8 },
diff --git a/icu4c/source/test/cintltst/nucnvtst.c b/icu4c/source/test/cintltst/nucnvtst.c

index 3265f4a92a2dec2a5aff4f151422a6655fe1e3b2..5742de0318d527bf965fe6666faa513a5ad4e22d 100644 (file)
--- a/icu4c/source/test/cintltst/nucnvtst.c
+++ b/icu4c/source/test/cintltst/nucnvtst.c
@@ -22,6 +22,7 @@
  #include "unicode/utypes.h"
  #include "unicode/ustring.h"
  #include "unicode/ucol.h"
+#include "unicode/utf16.h"
  #include "cmemory.h"
  #include "nucnvtst.h"
  
@@ -2972,9 +2973,9 @@ TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
              log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
              break;
          } else {
-            if(UTF_IS_FIRST_SURROGATE(*r)){
+            if(U16_IS_LEAD(*r)){
                  int i =0, len = 2;
-                UTF_NEXT_CHAR_SAFE(r, i, len, exC, FALSE);
+                U16_NEXT(r, i, len, exC);
                  r++;
              }else{
                  exC = *r;
@@ -3476,9 +3477,9 @@ unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *s
          }
          if(dstIndex < dstLen){
              if(c>0xFFFF){
-               dst[dstIndex++] = UTF16_LEAD(c);
+               dst[dstIndex++] = U16_LEAD(c);
                 if(dstIndex<dstLen){
-                    dst[dstIndex]=UTF16_TRAIL(c);
+                    dst[dstIndex]=U16_TRAIL(c);
                 }else{
                     *status=U_BUFFER_OVERFLOW_ERROR;
                 }
@@ -3516,8 +3517,8 @@ TestFullRoundtrip(const char* cp){
              usource[0] =(UChar) i;
              len=1;
          }else{
-            usource[0]=UTF16_LEAD(i);
-            usource[1]=UTF16_TRAIL(i);
+            usource[0]=U16_LEAD(i);
+            usource[1]=U16_TRAIL(i);
              len=2;
          }
          ulen=len;
diff --git a/icu4c/source/test/cintltst/sprpdata.c b/icu4c/source/test/cintltst/sprpdata.c

index ff2f9bd8cb73ea4eab94727b3cc72e4e5112a143..4865d67e64c605edbea2b11360d2d3741862ac2b 100644 (file)
--- a/icu4c/source/test/cintltst/sprpdata.c
+++ b/icu4c/source/test/cintltst/sprpdata.c
@@ -23,6 +23,7 @@
  #include "unicode/putil.h"
  #include "cintltst.h"
  #include "unicode/usprep.h"
+#include "unicode/utf16.h"
  #include "sprpimpl.h"
  #include "uparse.h"
  #include "cmemory.h"
@@ -206,8 +207,8 @@ compareMapping(UStringPrepProfile* data, uint32_t codepoint, uint32_t* mapping,i
                      log_err("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[i], mappingData[index+i]);
                  }
              }else{
-                UChar lead  = UTF16_LEAD(mapping[i]);
-                UChar trail = UTF16_TRAIL(mapping[i]);
+                UChar lead  = U16_LEAD(mapping[i]);
+                UChar trail = U16_TRAIL(mapping[i]);
                  if(mappingData[index+i] != lead ||
                      mappingData[index+i+1] != trail){
                      log_err( "Did not get the expected result. Expected: 0x%04X 0x%04X  Got: 0x%04X 0x%04X\n", lead, trail, mappingData[index+i], mappingData[index+i+1]);
@@ -234,7 +235,7 @@ compareFlagsForRange(UStringPrepProfile* data,
      UTrie trie = data->sprepTrie;
  /*
      // supplementary code point
-    UChar __lead16=UTF16_LEAD(0x2323E);
+    UChar __lead16=U16_LEAD(0x2323E);
      int32_t __offset;
  
      // get data for lead surrogate
diff --git a/icu4c/source/test/cintltst/trietest.c b/icu4c/source/test/cintltst/trietest.c

index 5d3b9491c5ee1abb1e516b5757f9a3323927dc1d..ac595b675764516d11649b90ccc1377cd120cc82 100644 (file)
--- a/icu4c/source/test/cintltst/trietest.c
+++ b/icu4c/source/test/cintltst/trietest.c
@@ -1,7 +1,7 @@
  /*
  ******************************************************************************
  *
-*   Copyright (C) 2001-2008, International Business Machines
+*   Copyright (C) 2001-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  ******************************************************************************
@@ -16,6 +16,7 @@
  
  #include <stdio.h>
  #include "unicode/utypes.h"
+#include "unicode/utf16.h"
  #include "utrie.h"
  #include "cstring.h"
  #include "cmemory.h"
@@ -156,7 +157,7 @@ testTrieIteration(const char *testName,
          c=checkRanges[i].limit;
          if(c!=0) {
              --c;
-            UTF_APPEND_CHAR_UNSAFE(s, length, c);
+            U16_APPEND_UNSAFE(s, length, c);
              values[countValues++]=checkRanges[i].value;
          }
      }
@@ -179,7 +180,7 @@ testTrieIteration(const char *testName,
          if(
              c2==0 ?
                  c!=*(p-1) :
-                !UTF_IS_LEAD(c) || !UTF_IS_TRAIL(c2) || c!=*(p-2) || c2!=*(p-1)
+                !U16_IS_LEAD(c) || !U16_IS_TRAIL(c2) || c!=*(p-2) || c2!=*(p-1)
          ) {
              log_err("error: wrong (c, c2) from UTRIE_NEXT(%s): (U+%04lx, U+%04lx)\n",
                      testName, c, c2);
@@ -243,7 +244,7 @@ testTrieIteration(const char *testName,
          if(
              c2==0 ?
                  c!=*p:
-                !UTF_IS_LEAD(c) || !UTF_IS_TRAIL(c2) || c!=*p || c2!=*(p+1)
+                !U16_IS_LEAD(c) || !U16_IS_TRAIL(c2) || c!=*p || c2!=*(p+1)
          ) {
              log_err("error: wrong (c, c2) from UTRIE_PREVIOUS(%s): (U+%04lx, U+%04lx)\n",
                      testName, c, c2);
@@ -384,7 +385,7 @@ testTrieRangesWithMalloc(const char *testName,
                      log_err("error: unserialized trie(%s).fromBMP(U+%04lx)==0x%lx instead of 0x%lx\n",
                              testName, start, value2, value);
                  }
-                if(!UTF_IS_LEAD(start)) {
+                if(!U16_IS_LEAD(start)) {
                      if(dataIs32) {
                          value2=UTRIE_GET32_FROM_LEAD(&trie, start);
                      } else {
@@ -593,7 +594,7 @@ testTrieRanges(const char *testName,
                      log_err("error: unserialized trie(%s).fromBMP(U+%04lx)==0x%lx instead of 0x%lx\n",
                              testName, start, value2, value);
                  }
-                if(!UTF_IS_LEAD(start)) {
+                if(!U16_IS_LEAD(start)) {
                      if(dataIs32) {
                          value2=UTRIE_GET32_FROM_LEAD(&trie, start);
                      } else {
diff --git a/icu4c/source/test/intltest/citrtest.cpp b/icu4c/source/test/intltest/citrtest.cpp

index 8f2059fb7c972ed5fc044c02d4ebed6f11ca7f63..be8803d626856486eed6fa13b15dad6418234276 100644 (file)
--- a/icu4c/source/test/intltest/citrtest.cpp
+++ b/icu4c/source/test/intltest/citrtest.cpp
@@ -1,6 +1,6 @@
  /****************************************************************************************
   * COPYRIGHT: 
- * Copyright (c) 1997-2010, International Business Machines Corporation and
+ * Copyright (c) 1997-2011, International Business Machines Corporation and
   * others. All Rights Reserved.
   * Modification History:
   *
@@ -18,6 +18,7 @@
  #include "unicode/uchriter.h"
  #include "unicode/uiter.h"
  #include "unicode/putil.h"
+#include "unicode/utf16.h"
  #include "citrtest.h"
  
  
@@ -102,20 +103,20 @@ public:
          case kStart:
              pos = begin;
              if(delta > 0) {
-                UTF_FWD_N(text, pos, end, delta);
+                U16_FWD_N(text, pos, end, delta);
              }
              break;
          case kCurrent:
              if(delta > 0) {
-                UTF_FWD_N(text, pos, end, delta);
+                U16_FWD_N(text, pos, end, delta);
              } else {
-                UTF_BACK_N(text, begin, pos, -delta);
+                U16_BACK_N(text, begin, pos, -delta);
              }
              break;
          case kEnd:
              pos = end;
              if(delta < 0) {
-                UTF_BACK_N(text, begin, pos, -delta);
+                U16_BACK_N(text, begin, pos, -delta);
              }
              break;
          default:
@@ -666,7 +667,7 @@ void CharIterTest::TestIterationUChar32() {
          c=iter.first32PostInc();
          if(c != text.char32At(i))
              errln("first32PostInc failed.  Expected->%X Got->%X", text.char32At(i), c);
-        if(iter.getIndex() != UTF16_CHAR_LENGTH(c) + i)
+        if(iter.getIndex() != U16_LENGTH(c) + i)
              errln((UnicodeString)"getIndex() after first32PostInc() failed");
  
          iter.setToStart();
diff --git a/icu4c/source/test/intltest/testidn.cpp b/icu4c/source/test/intltest/testidn.cpp

index 919acde7762094399afce4f0ac2f5901337d5153..8eeb716ad12bfc74d143edcd8ee0627b16454387 100644 (file)
--- a/icu4c/source/test/intltest/testidn.cpp
+++ b/icu4c/source/test/intltest/testidn.cpp
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 2003-2009, International Business Machines
+*   Copyright (C) 2003-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
@@ -30,6 +30,7 @@
  #include "cmemory.h"
  #include "cstring.h"
  #include "unicode/udata.h"
+#include "unicode/utf16.h"
  #include "unewdata.h"
  #include "uoptions.h"
  #include "uparse.h"
@@ -381,8 +382,8 @@ compareMapping(uint32_t codepoint, uint32_t* mapping,int32_t mapLength,
                      pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[i], mappingData[index+i]);
                  }
              }else{
-                UChar lead  = UTF16_LEAD(mapping[i]);
-                UChar trail = UTF16_TRAIL(mapping[i]);
+                UChar lead  = U16_LEAD(mapping[i]);
+                UChar trail = U16_TRAIL(mapping[i]);
                  if(mappingData[index+i] != lead ||
                      mappingData[index+i+1] != trail){
                      pTestIDNA->errln( "Did not get the expected result. Expected: 0x%04X 0x%04X  Got: 0x%04X 0x%04X", lead, trail, mappingData[index+i], mappingData[index+i+1]);
@@ -405,9 +406,9 @@ compareFlagsForRange(uint32_t start, uint32_t end,
      UStringPrepType retType;
      UBool isIndex=FALSE;
      int32_t value=0;
-/*        
+/*
      // supplementary code point 
-    UChar __lead16=UTF16_LEAD(0x2323E);
+    UChar __lead16=U16_LEAD(0x2323E);
      int32_t __offset;
  
      // get data for lead surrogate 
diff --git a/icu4c/source/test/intltest/transrt.cpp b/icu4c/source/test/intltest/transrt.cpp

index fc3a22f5d28f7437b56e5a5a8ffb496e7c0463f9..beaeae779e9f2fb85cd0cb9fef2c5875ae2160e6 100644 (file)
--- a/icu4c/source/test/intltest/transrt.cpp
+++ b/icu4c/source/test/intltest/transrt.cpp
@@ -26,6 +26,7 @@
  #include "unicode/locid.h"
  #include "unicode/ulocdata.h"
  #include "unicode/utf8.h"
+#include "unicode/utf16.h"
  #include "putilimp.h"
  #include "cmemory.h"
  #include "transrt.h"
@@ -439,7 +440,7 @@ UBool RTTest::isCamel(const UnicodeString& a) {
      // see if string is of the form aB; e.g. lower, then upper or title
      UChar32 cp;
      UBool haveLower = FALSE;
-    for (int32_t i = 0; i < a.length(); i += UTF_CHAR_LENGTH(cp)) {
+    for (int32_t i = 0; i < a.length(); i += U16_LENGTH(cp)) {
          cp = a.char32At(i);
          int8_t t = u_charType(cp);
          switch (t) {
diff --git a/icu4c/source/test/intltest/transtst.cpp b/icu4c/source/test/intltest/transtst.cpp

index 929b4b73b4a931e3d749c9908cc63aef0156cf74..8ea0f1515819c4ece464e24c7a481baafd0d3b4f 100644 (file)
--- a/icu4c/source/test/intltest/transtst.cpp
+++ b/icu4c/source/test/intltest/transtst.cpp
@@ -23,6 +23,7 @@
  #include "unicode/ustring.h"
  #include "unicode/usetiter.h"
  #include "unicode/uscript.h"
+#include "unicode/utf16.h"
  #include "cpdtrans.h"
  #include "nultrans.h"
  #include "rbt.h"
@@ -3460,7 +3461,7 @@ void TransliteratorTest::TestSurrogateCasing (void) {
      char buffer[20];
      UChar buffer2[20];
      UChar32 dee;
-    UTF_GET_CHAR(DESERET_dee,0, 0, DESERET_dee.length(), dee);
+    U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
      UnicodeString DEE(u_totitle(dee));
      if (DEE != DESERET_DEE) {
          err("Fails titlecase of surrogates");
diff --git a/icu4c/source/test/intltest/tstnorm.cpp b/icu4c/source/test/intltest/tstnorm.cpp

index d0aeaac746eb228802866191c45c7102735bef70..1e9f085fca1a0a2729ebbe0e240b7f3d90e07c0d 100644 (file)
--- a/icu4c/source/test/intltest/tstnorm.cpp
+++ b/icu4c/source/test/intltest/tstnorm.cpp
@@ -14,6 +14,7 @@
  #include "unicode/uniset.h"
  #include "unicode/usetiter.h"
  #include "unicode/schriter.h"
+#include "unicode/utf16.h"
  #include "cstring.h"
  #include "normalizer2impl.h"
  #include "tstnorm.h"
@@ -688,8 +689,8 @@ void
  BasicNormalizerTest::TestPreviousNext() {
      // src and expect strings
      static const UChar src[]={
-        UTF16_LEAD(0x2f999), UTF16_TRAIL(0x2f999),
-        UTF16_LEAD(0x1d15f), UTF16_TRAIL(0x1d15f),
+        U16_LEAD(0x2f999), U16_TRAIL(0x2f999),
+        U16_LEAD(0x1d15f), U16_TRAIL(0x1d15f),
          0xc4,
          0x1ed0
      };
@@ -711,7 +712,7 @@ BasicNormalizerTest::TestPreviousNext() {
  
      // src and expect strings for regression test for j2911
      static const UChar src_j2911[]={
-        UTF16_LEAD(0x2f999), UTF16_TRAIL(0x2f999),
+        U16_LEAD(0x2f999), U16_TRAIL(0x2f999),
          0xdd00, 0xd900, // unpaired surrogates - regression test for j2911
          0xc4,
          0x4f, 0x302, 0x301
diff --git a/icu4c/source/test/intltest/ustrtest.cpp b/icu4c/source/test/intltest/ustrtest.cpp

index c6c90a371ca4956b811cd5877962cb9794d9e765..13251a35bfda363c1d65e8456c4bfa1182e15cc3 100644 (file)
--- a/icu4c/source/test/intltest/ustrtest.cpp
+++ b/icu4c/source/test/intltest/ustrtest.cpp
@@ -13,6 +13,7 @@
  #include "unicode/locid.h"
  #include "unicode/ucnv.h"
  #include "unicode/uenum.h"
+#include "unicode/utf16.h"
  #include "cmemory.h"
  #include "charstr.h"
  
@@ -1275,7 +1276,7 @@ UnicodeStringTest::TestStackAllocation()
  
      // test the UChar32 constructor
      UnicodeString c32Test((UChar32)0x10ff2a);
-    if( c32Test.length() != UTF_CHAR_LENGTH(0x10ff2a) ||
+    if( c32Test.length() != U16_LENGTH(0x10ff2a) ||
          c32Test.char32At(c32Test.length() - 1) != 0x10ff2a
      ) {
          errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler");
@@ -1283,7 +1284,7 @@ UnicodeStringTest::TestStackAllocation()
  
      // test the (new) capacity constructor
      UnicodeString capTest(5, (UChar32)0x2a, 5);
-    if( capTest.length() != 5 * UTF_CHAR_LENGTH(0x2a) ||
+    if( capTest.length() != 5 * U16_LENGTH(0x2a) ||
          capTest.char32At(0) != 0x2a ||
          capTest.char32At(4) != 0x2a
      ) {
@@ -1291,7 +1292,7 @@ UnicodeStringTest::TestStackAllocation()
      }
  
      capTest = UnicodeString(5, (UChar32)0x10ff2a, 5);
-    if( capTest.length() != 5 * UTF_CHAR_LENGTH(0x10ff2a) ||
+    if( capTest.length() != 5 * U16_LENGTH(0x10ff2a) ||
          capTest.char32At(0) != 0x10ff2a ||
          capTest.char32At(4) != 0x10ff2a
      ) {
diff --git a/icu4c/source/tools/dumpce/dumpce.cpp b/icu4c/source/tools/dumpce/dumpce.cpp

index 236e3799942133900b79ff669138b5acf3af47eb..b95ca087adc1ad2cc0d6d98f8973b048964f6b68 100644 (file)
--- a/icu4c/source/tools/dumpce/dumpce.cpp
+++ b/icu4c/source/tools/dumpce/dumpce.cpp
@@ -873,7 +873,7 @@ void outputScriptElem(ScriptElement &element, int compare, UBool expansion)
      while (i < element.count) {
          char    str[128];
          UChar32 codepoint;
-        UTF_NEXT_CHAR(element.ch, i, element.count, codepoint);
+        U16_NEXT(element.ch, i, element.count, codepoint);
          int32_t temp = u_charName(codepoint, U_UNICODE_CHAR_NAME, str, 128, 
                                        &error);
          if (U_FAILURE(error)) {
@@ -938,7 +938,7 @@ inline UBool checkInScripts(UScriptCode script[], int scriptcount,
      int i = 0;
      while (i < scriptelem.count) {
          UChar32     codepoint;
-        UTF_NEXT_CHAR(scriptelem.ch, i, scriptelem.count, codepoint);
+        U16_NEXT(scriptelem.ch, i, scriptelem.count, codepoint);
          UErrorCode  error = U_ZERO_ERROR;
          if (checkInScripts(script, scriptcount, codepoint)) {
              return TRUE;
diff --git a/icu4c/source/tools/gensprep/store.c b/icu4c/source/tools/gensprep/store.c

index d2ec374121837a80ca439e7252d98e7cef2b6122..6da060184d79993f2e1061a160f75c9550c4da77 100644 (file)
--- a/icu4c/source/tools/gensprep/store.c
+++ b/icu4c/source/tools/gensprep/store.c
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 1999-2009, International Business Machines
+*   Copyright (C) 1999-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
@@ -22,6 +22,7 @@
  #include "cstring.h"
  #include "filestrm.h"
  #include "unicode/udata.h"
+#include "unicode/utf16.h"
  #include "utrie.h"
  #include "unewdata.h"
  #include "gensprep.h"
@@ -467,8 +468,8 @@ storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length,
          if(mapping[i] <= 0xFFFF){
              map[i] = (uint16_t)mapping[i];
          }else{
-            map[i]   = UTF16_LEAD(mapping[i]);
-            map[i+1] = UTF16_TRAIL(mapping[i]);
+            map[i]   = U16_LEAD(mapping[i]);
+            map[i+1] = U16_TRAIL(mapping[i]);
          }
          i++;
      }
diff --git a/icu4c/source/tools/toolutil/ucbuf.c b/icu4c/source/tools/toolutil/ucbuf.c

index 5194ea6f76ffdc664ec4e66fae3952d5c526150c..0417c1d7287177e9a7d50cd6708c9475fd15796a 100644 (file)
--- a/icu4c/source/tools/toolutil/ucbuf.c
+++ b/icu4c/source/tools/toolutil/ucbuf.c
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 1998-2008, International Business Machines
+*   Copyright (C) 1998-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
@@ -17,14 +17,15 @@
  
  #include "unicode/utypes.h"
  #include "unicode/putil.h"
+#include "unicode/uchar.h"
  #include "unicode/ucnv.h"
  #include "unicode/ucnv_err.h"
+#include "unicode/ustring.h"
+#include "unicode/utf16.h"
  #include "filestrm.h"
  #include "cstring.h"
  #include "cmemory.h"
  #include "ustrfmt.h"
-#include "unicode/ustring.h"
-#include "unicode/uchar.h"
  #include "ucbuf.h"
  #include <stdio.h>
  
@@ -353,8 +354,8 @@ ucbuf_getc32(UCHARBUF* buf,UErrorCode* error){
              return U_EOF;
          }
      }
-    if(UTF_IS_LEAD(*(buf->currentPos))){
-        retVal=UTF16_GET_PAIR_VALUE(buf->currentPos[0],buf->currentPos[1]);
+    if(U16_IS_LEAD(*(buf->currentPos))){
+        retVal=U16_GET_SUPPLEMENTARY(buf->currentPos[0],buf->currentPos[1]);
          buf->currentPos+=2;
      }else{
          retVal = *(buf->currentPos++);
diff --git a/icu4c/source/tools/toolutil/uparse.c b/icu4c/source/tools/toolutil/uparse.c

index 9902c5c277bdd234b830b37f8cbe50bc6727da06..3d098cbfac81b58096c0e6bf2dbdb3442c85ff48 100644 (file)
--- a/icu4c/source/tools/toolutil/uparse.c
+++ b/icu4c/source/tools/toolutil/uparse.c
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 2000-2010, International Business Machines
+*   Copyright (C) 2000-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
@@ -19,11 +19,12 @@
  */
  
  #include "unicode/utypes.h"
+#include "unicode/uchar.h"
+#include "unicode/ustring.h"
+#include "unicode/utf16.h"
  #include "cstring.h"
  #include "filestrm.h"
  #include "uparse.h"
-#include "unicode/uchar.h"
-#include "unicode/ustring.h"
  #include "ustr_imp.h"
  
  #include <stdio.h>
author	Markus Scherer <markus.icu@gmail.com>
	Wed, 27 Jul 2011 05:53:56 +0000 (05:53 +0000)
committer	Markus Scherer <markus.icu@gmail.com>
	Wed, 27 Jul 2011 05:53:56 +0000 (05:53 +0000)
icu4c/readme.html		patch \| blob \| history
icu4c/source/common/appendable.cpp		patch \| blob \| history
icu4c/source/common/bmpset.cpp		patch \| blob \| history
icu4c/source/common/caniter.cpp		patch \| blob \| history
icu4c/source/common/messagepattern.cpp		patch \| blob \| history
icu4c/source/common/normalizer2impl.cpp		patch \| blob \| history
icu4c/source/common/normalizer2impl.h		patch \| blob \| history
icu4c/source/common/normlzr.cpp		patch \| blob \| history
icu4c/source/common/punycode.cpp		patch \| blob \| history
icu4c/source/common/ruleiter.cpp		patch \| blob \| history
icu4c/source/common/ubidi.c		patch \| blob \| history
icu4c/source/common/ubidiwrt.c		patch \| blob \| history
icu4c/source/common/ucase.cpp		patch \| blob \| history
icu4c/source/common/ucasemap.cpp		patch \| blob \| history
icu4c/source/common/ucharstrie.cpp		patch \| blob \| history
icu4c/source/common/uchriter.cpp		patch \| blob \| history
icu4c/source/common/ucnv.c		patch \| blob \| history
icu4c/source/common/ucnv2022.cpp		patch \| blob \| history
icu4c/source/common/ucnv_bld.h		patch \| blob \| history
icu4c/source/common/ucnv_ct.c		patch \| blob \| history
icu4c/source/common/ucnv_u32.c		patch \| blob \| history
icu4c/source/common/ucnv_u8.c		patch \| blob \| history
icu4c/source/common/ucnvbocu.cpp		patch \| blob \| history
icu4c/source/common/ucnvhz.c		patch \| blob \| history
icu4c/source/common/ucnvisci.c		patch \| blob \| history
icu4c/source/common/ucnvlat1.c		patch \| blob \| history
icu4c/source/common/ucnvmbcs.c		patch \| blob \| history
icu4c/source/common/ucnvscsu.c		patch \| blob \| history
icu4c/source/common/uiter.cpp		patch \| blob \| history
icu4c/source/common/unames.cpp		patch \| blob \| history
icu4c/source/common/unicode/ucharstrie.h		patch \| blob \| history
icu4c/source/common/unicode/uconfig.h		patch \| blob \| history
icu4c/source/common/unicode/unistr.h		patch \| blob \| history
icu4c/source/common/unicode/utf.h		patch \| blob \| history
icu4c/source/common/unicode/utf16.h		patch \| blob \| history
icu4c/source/common/unicode/utf8.h		patch \| blob \| history
icu4c/source/common/unicode/utf_old.h		patch \| blob \| history
icu4c/source/common/unicode/utypes.h		patch \| blob \| history
icu4c/source/common/unifilt.cpp		patch \| blob \| history
icu4c/source/common/uniset.cpp		patch \| blob \| history
icu4c/source/common/unisetspan.cpp		patch \| blob \| history
icu4c/source/common/unistr.cpp		patch \| blob \| history
icu4c/source/common/unistr_props.cpp		patch \| blob \| history
icu4c/source/common/unorm_it.c		patch \| blob \| history
icu4c/source/common/uresdata.c		patch \| blob \| history
icu4c/source/common/ushape.cpp		patch \| blob \| history
icu4c/source/common/ustrcase.cpp		patch \| blob \| history
icu4c/source/common/ustring.cpp		patch \| blob \| history
icu4c/source/common/ustrtrns.cpp		patch \| blob \| history
icu4c/source/common/utext.cpp		patch \| blob \| history
icu4c/source/common/utf_impl.c		patch \| blob \| history
icu4c/source/common/util.cpp		patch \| blob \| history
icu4c/source/common/util_props.cpp		patch \| blob \| history
icu4c/source/common/utrie.h		patch \| blob \| history
icu4c/source/common/utrie2.cpp		patch \| blob \| history
icu4c/source/common/utrie2.h		patch \| blob \| history
icu4c/source/common/uts46.cpp		patch \| blob \| history
icu4c/source/i18n/bocsu.cpp		patch \| blob \| history
icu4c/source/i18n/casetrn.cpp		patch \| blob \| history
icu4c/source/i18n/decimfmt.cpp		patch \| blob \| history
icu4c/source/i18n/esctrn.cpp		patch \| blob \| history
icu4c/source/i18n/name2uni.cpp		patch \| blob \| history
icu4c/source/i18n/nortrans.cpp		patch \| blob \| history
icu4c/source/i18n/rbt_pars.cpp		patch \| blob \| history
icu4c/source/i18n/rbt_rule.cpp		patch \| blob \| history
icu4c/source/i18n/rbt_set.cpp		patch \| blob \| history
icu4c/source/i18n/regexcmp.cpp		patch \| blob \| history
icu4c/source/i18n/regextxt.cpp		patch \| blob \| history
icu4c/source/i18n/rematch.cpp		patch \| blob \| history
icu4c/source/i18n/smpdtfmt.cpp		patch \| blob \| history
icu4c/source/i18n/strmatch.cpp		patch \| blob \| history
icu4c/source/i18n/strrepl.cpp		patch \| blob \| history
icu4c/source/i18n/titletrn.cpp		patch \| blob \| history
icu4c/source/i18n/translit.cpp		patch \| blob \| history
icu4c/source/i18n/ucol_bld.cpp		patch \| blob \| history
icu4c/source/i18n/ucol_elm.cpp		patch \| blob \| history
icu4c/source/i18n/ucol_imp.h		patch \| blob \| history
icu4c/source/i18n/unesctrn.cpp		patch \| blob \| history
icu4c/source/i18n/uni2name.cpp		patch \| blob \| history
icu4c/source/i18n/uregex.cpp		patch \| blob \| history
icu4c/source/i18n/usearch.cpp		patch \| blob \| history
icu4c/source/i18n/uspoof.cpp		patch \| blob \| history
icu4c/source/i18n/uspoof_impl.cpp		patch \| blob \| history
icu4c/source/io/ufmt_cmn.h		patch \| blob \| history
icu4c/source/io/uprntf_p.c		patch \| blob \| history
icu4c/source/io/ustream.cpp		patch \| blob \| history
icu4c/source/test/cintltst/bocu1tst.c		patch \| blob \| history
icu4c/source/test/cintltst/cmsccoll.c		patch \| blob \| history
icu4c/source/test/cintltst/cnormtst.c		patch \| blob \| history
icu4c/source/test/cintltst/cucdtst.c		patch \| blob \| history
icu4c/source/test/cintltst/nccbtst.c		patch \| blob \| history
icu4c/source/test/cintltst/nucnvtst.c		patch \| blob \| history
icu4c/source/test/cintltst/sprpdata.c		patch \| blob \| history
icu4c/source/test/cintltst/trietest.c		patch \| blob \| history
icu4c/source/test/intltest/citrtest.cpp		patch \| blob \| history
icu4c/source/test/intltest/testidn.cpp		patch \| blob \| history
icu4c/source/test/intltest/transrt.cpp		patch \| blob \| history
icu4c/source/test/intltest/transtst.cpp		patch \| blob \| history
icu4c/source/test/intltest/tstnorm.cpp		patch \| blob \| history
icu4c/source/test/intltest/ustrtest.cpp		patch \| blob \| history
icu4c/source/tools/dumpce/dumpce.cpp		patch \| blob \| history
icu4c/source/tools/gensprep/store.c		patch \| blob \| history
icu4c/source/tools/toolutil/ucbuf.c		patch \| blob \| history
icu4c/source/tools/toolutil/uparse.c		patch \| blob \| history